diff --git a/lib/Makefile.am b/lib/Makefile.am index 8ba657aac..9a5a70e0d 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -21,7 +21,7 @@ # the same distribution terms as the rest of that program. # # Generated by gnulib-tool. -# Reproduce by: gnulib-tool --import --dir=. --lib=libgnu --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=tools --no-conditional-dependencies --libtool --macro-prefix=gl --no-vc-files argp error gethrxtime mkstemp progname sys_wait +# Reproduce by: gnulib-tool --import --dir=. --lib=libgnu --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=tools --no-conditional-dependencies --libtool --macro-prefix=gl --no-vc-files argmatch argp error gethrxtime isatty mkstemp progname sys_wait AUTOMAKE_OPTIONS = 1.9.6 gnits @@ -87,6 +87,14 @@ EXTRA_DIST += alloca.in.h ## end gnulib module alloca-opt +## begin gnulib module argmatch + +libgnu_la_SOURCES += argmatch.c + +EXTRA_DIST += argmatch.h + +## end gnulib module argmatch + ## begin gnulib module argp libgnu_la_SOURCES += argp.h argp-ba.c argp-eexst.c \ @@ -96,6 +104,69 @@ libgnu_la_SOURCES += argp.h argp-ba.c argp-eexst.c \ ## end gnulib module argp +## begin gnulib module c-ctype + +libgnu_la_SOURCES += c-ctype.h c-ctype.c + +## end gnulib module c-ctype + +## begin gnulib module c-strcase + +libgnu_la_SOURCES += c-strcase.h c-strcasecmp.c c-strncasecmp.c + +## end gnulib module c-strcase + +## begin gnulib module c-strcaseeq + + +EXTRA_DIST += c-strcaseeq.h + +## end gnulib module c-strcaseeq + +## begin gnulib module configmake + +# Listed in the same order as the GNU makefile conventions, and +# provided by autoconf 2.59c+. +# The Automake-defined pkg* macros are appended, in the order +# listed in the Automake 1.10a+ documentation. +configmake.h: Makefile + $(AM_V_GEN)rm -f $@-t && \ + { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \ + echo '#define PREFIX "$(prefix)"'; \ + echo '#define EXEC_PREFIX "$(exec_prefix)"'; \ + echo '#define BINDIR "$(bindir)"'; \ + echo '#define SBINDIR "$(sbindir)"'; \ + echo '#define LIBEXECDIR "$(libexecdir)"'; \ + echo '#define DATAROOTDIR "$(datarootdir)"'; \ + echo '#define DATADIR "$(datadir)"'; \ + echo '#define SYSCONFDIR "$(sysconfdir)"'; \ + echo '#define SHAREDSTATEDIR "$(sharedstatedir)"'; \ + echo '#define LOCALSTATEDIR "$(localstatedir)"'; \ + echo '#define INCLUDEDIR "$(includedir)"'; \ + echo '#define OLDINCLUDEDIR "$(oldincludedir)"'; \ + echo '#define DOCDIR "$(docdir)"'; \ + echo '#define INFODIR "$(infodir)"'; \ + echo '#define HTMLDIR "$(htmldir)"'; \ + echo '#define DVIDIR "$(dvidir)"'; \ + echo '#define PDFDIR "$(pdfdir)"'; \ + echo '#define PSDIR "$(psdir)"'; \ + echo '#define LIBDIR "$(libdir)"'; \ + echo '#define LISPDIR "$(lispdir)"'; \ + echo '#define LOCALEDIR "$(localedir)"'; \ + echo '#define MANDIR "$(mandir)"'; \ + echo '#define MANEXT "$(manext)"'; \ + echo '#define PKGDATADIR "$(pkgdatadir)"'; \ + echo '#define PKGINCLUDEDIR "$(pkgincludedir)"'; \ + echo '#define PKGLIBDIR "$(pkglibdir)"'; \ + echo '#define PKGLIBEXECDIR "$(pkglibexecdir)"'; \ + } | sed '/""/d' > $@-t && \ + mv -f $@-t $@ + +BUILT_SOURCES += configmake.h +CLEANFILES += configmake.h configmake.h-t + +## end gnulib module configmake + ## begin gnulib module dirname-lgpl libgnu_la_SOURCES += dirname-lgpl.c basename-lgpl.c stripslash.c @@ -154,6 +225,14 @@ EXTRA_libgnu_la_SOURCES += error.c ## end gnulib module error +## begin gnulib module exitfail + +libgnu_la_SOURCES += exitfail.c + +EXTRA_DIST += exitfail.h + +## end gnulib module exitfail + ## begin gnulib module fcntl-h BUILT_SOURCES += fcntl.h @@ -283,6 +362,89 @@ EXTRA_DIST += intprops.h ## end gnulib module intprops +## begin gnulib module isatty + + +EXTRA_DIST += isatty.c + +EXTRA_libgnu_la_SOURCES += isatty.c + +## end gnulib module isatty + +## begin gnulib module localcharset + +libgnu_la_SOURCES += localcharset.h localcharset.c + +# We need the following in order to install a simple file in $(libdir) +# which is shared with other installed packages. We use a list of referencing +# packages so that "make uninstall" will remove the file if and only if it +# is not used by another installed package. +# On systems with glibc-2.1 or newer, the file is redundant, therefore we +# avoid installing it. + +all-local: charset.alias ref-add.sed ref-del.sed + +charset_alias = $(DESTDIR)$(libdir)/charset.alias +charset_tmp = $(DESTDIR)$(libdir)/charset.tmp +install-exec-local: install-exec-localcharset +install-exec-localcharset: all-local + if test $(GLIBC21) = no; then \ + case '$(host_os)' in \ + darwin[56]*) \ + need_charset_alias=true ;; \ + darwin* | cygwin* | mingw* | pw32* | cegcc*) \ + need_charset_alias=false ;; \ + *) \ + need_charset_alias=true ;; \ + esac ; \ + else \ + need_charset_alias=false ; \ + fi ; \ + if $$need_charset_alias; then \ + $(mkinstalldirs) $(DESTDIR)$(libdir) ; \ + fi ; \ + if test -f $(charset_alias); then \ + sed -f ref-add.sed $(charset_alias) > $(charset_tmp) ; \ + $(INSTALL_DATA) $(charset_tmp) $(charset_alias) ; \ + rm -f $(charset_tmp) ; \ + else \ + if $$need_charset_alias; then \ + sed -f ref-add.sed charset.alias > $(charset_tmp) ; \ + $(INSTALL_DATA) $(charset_tmp) $(charset_alias) ; \ + rm -f $(charset_tmp) ; \ + fi ; \ + fi + +uninstall-local: uninstall-localcharset +uninstall-localcharset: all-local + if test -f $(charset_alias); then \ + sed -f ref-del.sed $(charset_alias) > $(charset_tmp); \ + if grep '^# Packages using this file: $$' $(charset_tmp) \ + > /dev/null; then \ + rm -f $(charset_alias); \ + else \ + $(INSTALL_DATA) $(charset_tmp) $(charset_alias); \ + fi; \ + rm -f $(charset_tmp); \ + fi + +charset.alias: config.charset + $(AM_V_GEN)rm -f t-$@ $@ && \ + $(SHELL) $(srcdir)/config.charset '$(host)' > t-$@ && \ + mv t-$@ $@ + +SUFFIXES += .sed .sin +.sin.sed: + $(AM_V_GEN)rm -f t-$@ $@ && \ + sed -e '/^#/d' -e 's/@''PACKAGE''@/$(PACKAGE)/g' $< > t-$@ && \ + mv t-$@ $@ + +CLEANFILES += charset.alias ref-add.sed ref-del.sed + +EXTRA_DIST += config.charset ref-add.sin ref-del.sin + +## end gnulib module localcharset + ## begin gnulib module lstat @@ -310,6 +472,24 @@ EXTRA_libgnu_la_SOURCES += malloc.c ## end gnulib module malloc-posix +## begin gnulib module mbrtowc + + +EXTRA_DIST += mbrtowc.c + +EXTRA_libgnu_la_SOURCES += mbrtowc.c + +## end gnulib module mbrtowc + +## begin gnulib module mbsinit + + +EXTRA_DIST += mbsinit.c + +EXTRA_libgnu_la_SOURCES += mbsinit.c + +## end gnulib module mbsinit + ## begin gnulib module memchr @@ -368,6 +548,21 @@ libgnu_la_SOURCES += progname.h progname.c ## end gnulib module progname +## begin gnulib module quote + + +EXTRA_DIST += quote.h + +## end gnulib module quote + +## begin gnulib module quotearg + +libgnu_la_SOURCES += quotearg.c + +EXTRA_DIST += quote.h quotearg.h + +## end gnulib module quotearg + ## begin gnulib module rawmemchr @@ -866,6 +1061,13 @@ EXTRA_libgnu_la_SOURCES += strchrnul.c ## end gnulib module strchrnul +## begin gnulib module streq + + +EXTRA_DIST += streq.h + +## end gnulib module streq + ## begin gnulib module strerror @@ -1566,6 +1768,67 @@ EXTRA_DIST += wchar.in.h ## end gnulib module wchar +## begin gnulib module wctype-h + +BUILT_SOURCES += wctype.h +libgnu_la_SOURCES += wctype-h.c + +# We need the following in order to create when the system +# doesn't have one that works with the given compiler. +wctype.h: wctype.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(WARN_ON_USE_H) + $(AM_V_GEN)rm -f $@-t $@ && \ + { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \ + sed -e 's|@''GUARD_PREFIX''@|GL|g' \ + -e 's/@''HAVE_WCTYPE_H''@/$(HAVE_WCTYPE_H)/g' \ + -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \ + -e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \ + -e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \ + -e 's|@''NEXT_WCTYPE_H''@|$(NEXT_WCTYPE_H)|g' \ + -e 's/@''GNULIB_ISWBLANK''@/$(GNULIB_ISWBLANK)/g' \ + -e 's/@''GNULIB_WCTYPE''@/$(GNULIB_WCTYPE)/g' \ + -e 's/@''GNULIB_ISWCTYPE''@/$(GNULIB_ISWCTYPE)/g' \ + -e 's/@''GNULIB_WCTRANS''@/$(GNULIB_WCTRANS)/g' \ + -e 's/@''GNULIB_TOWCTRANS''@/$(GNULIB_TOWCTRANS)/g' \ + -e 's/@''HAVE_ISWBLANK''@/$(HAVE_ISWBLANK)/g' \ + -e 's/@''HAVE_ISWCNTRL''@/$(HAVE_ISWCNTRL)/g' \ + -e 's/@''HAVE_WCTYPE_T''@/$(HAVE_WCTYPE_T)/g' \ + -e 's/@''HAVE_WCTRANS_T''@/$(HAVE_WCTRANS_T)/g' \ + -e 's/@''HAVE_WINT_T''@/$(HAVE_WINT_T)/g' \ + -e 's/@''REPLACE_ISWBLANK''@/$(REPLACE_ISWBLANK)/g' \ + -e 's/@''REPLACE_ISWCNTRL''@/$(REPLACE_ISWCNTRL)/g' \ + -e 's/@''REPLACE_TOWLOWER''@/$(REPLACE_TOWLOWER)/g' \ + -e '/definitions of _GL_FUNCDECL_RPL/r $(CXXDEFS_H)' \ + -e '/definition of _GL_WARN_ON_USE/r $(WARN_ON_USE_H)' \ + < $(srcdir)/wctype.in.h; \ + } > $@-t && \ + mv $@-t $@ +MOSTLYCLEANFILES += wctype.h wctype.h-t + +EXTRA_DIST += wctype.in.h + +## end gnulib module wctype-h + +## begin gnulib module xalloc + +libgnu_la_SOURCES += xmalloc.c + +EXTRA_DIST += xalloc.h + +## end gnulib module xalloc + +## begin gnulib module xalloc-die + +libgnu_la_SOURCES += xalloc-die.c + +## end gnulib module xalloc-die + +## begin gnulib module xalloc-oversized + + +EXTRA_DIST += xalloc-oversized.h + +## end gnulib module xalloc-oversized + ## begin gnulib module xsize libgnu_la_SOURCES += xsize.h xsize.c diff --git a/lib/argmatch.c b/lib/argmatch.c new file mode 100644 index 000000000..9125e2af0 --- /dev/null +++ b/lib/argmatch.c @@ -0,0 +1,277 @@ +/* argmatch.c -- find a match for a string in an array + + Copyright (C) 1990, 1998-1999, 2001-2007, 2009-2013 Free Software + Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by David MacKenzie + Modified by Akim Demaille */ + +#include + +/* Specification. */ +#include "argmatch.h" + +#include +#include +#include +#include + +#include "gettext.h" +#define _(msgid) gettext (msgid) + +#include "error.h" +#include "quotearg.h" +#include "quote.h" + +#if USE_UNLOCKED_IO +# include "unlocked-io.h" +#endif + +/* When reporting an invalid argument, show nonprinting characters + by using the quoting style ARGMATCH_QUOTING_STYLE. Do not use + literal_quoting_style. */ +#ifndef ARGMATCH_QUOTING_STYLE +# define ARGMATCH_QUOTING_STYLE locale_quoting_style +#endif + +/* Non failing version of argmatch call this function after failing. */ +#ifndef ARGMATCH_DIE +# include "exitfail.h" +# define ARGMATCH_DIE exit (exit_failure) +#endif + +#ifdef ARGMATCH_DIE_DECL +ARGMATCH_DIE_DECL; +#endif + +static void +__argmatch_die (void) +{ + ARGMATCH_DIE; +} + +/* Used by XARGMATCH and XARGCASEMATCH. See description in argmatch.h. + Default to __argmatch_die, but allow caller to change this at run-time. */ +argmatch_exit_fn argmatch_die = __argmatch_die; + + +/* If ARG is an unambiguous match for an element of the + NULL-terminated array ARGLIST, return the index in ARGLIST + of the matched element, else -1 if it does not match any element + or -2 if it is ambiguous (is a prefix of more than one element). + + If VALLIST is none null, use it to resolve ambiguities limited to + synonyms, i.e., for + "yes", "yop" -> 0 + "no", "nope" -> 1 + "y" is a valid argument, for 0, and "n" for 1. */ + +ptrdiff_t +argmatch (const char *arg, const char *const *arglist, + const char *vallist, size_t valsize) +{ + size_t i; /* Temporary index in ARGLIST. */ + size_t arglen; /* Length of ARG. */ + ptrdiff_t matchind = -1; /* Index of first nonexact match. */ + bool ambiguous = false; /* If true, multiple nonexact match(es). */ + + arglen = strlen (arg); + + /* Test all elements for either exact match or abbreviated matches. */ + for (i = 0; arglist[i]; i++) + { + if (!strncmp (arglist[i], arg, arglen)) + { + if (strlen (arglist[i]) == arglen) + /* Exact match found. */ + return i; + else if (matchind == -1) + /* First nonexact match found. */ + matchind = i; + else + { + /* Second nonexact match found. */ + if (vallist == NULL + || memcmp (vallist + valsize * matchind, + vallist + valsize * i, valsize)) + { + /* There is a real ambiguity, or we could not + disambiguate. */ + ambiguous = true; + } + } + } + } + if (ambiguous) + return -2; + else + return matchind; +} + +/* Error reporting for argmatch. + CONTEXT is a description of the type of entity that was being matched. + VALUE is the invalid value that was given. + PROBLEM is the return value from argmatch. */ + +void +argmatch_invalid (const char *context, const char *value, ptrdiff_t problem) +{ + char const *format = (problem == -1 + ? _("invalid argument %s for %s") + : _("ambiguous argument %s for %s")); + + error (0, 0, format, quotearg_n_style (0, ARGMATCH_QUOTING_STYLE, value), + quote_n (1, context)); +} + +/* List the valid arguments for argmatch. + ARGLIST is the same as in argmatch. + VALLIST is a pointer to an array of values. + VALSIZE is the size of the elements of VALLIST */ +void +argmatch_valid (const char *const *arglist, + const char *vallist, size_t valsize) +{ + size_t i; + const char *last_val = NULL; + + /* We try to put synonyms on the same line. The assumption is that + synonyms follow each other */ + fputs (_("Valid arguments are:"), stderr); + for (i = 0; arglist[i]; i++) + if ((i == 0) + || memcmp (last_val, vallist + valsize * i, valsize)) + { + fprintf (stderr, "\n - %s", quote (arglist[i])); + last_val = vallist + valsize * i; + } + else + { + fprintf (stderr, ", %s", quote (arglist[i])); + } + putc ('\n', stderr); +} + +/* Never failing versions of the previous functions. + + CONTEXT is the context for which argmatch is called (e.g., + "--version-control", or "$VERSION_CONTROL" etc.). Upon failure, + calls the (supposed never to return) function EXIT_FN. */ + +ptrdiff_t +__xargmatch_internal (const char *context, + const char *arg, const char *const *arglist, + const char *vallist, size_t valsize, + argmatch_exit_fn exit_fn) +{ + ptrdiff_t res = argmatch (arg, arglist, vallist, valsize); + if (res >= 0) + /* Success. */ + return res; + + /* We failed. Explain why. */ + argmatch_invalid (context, arg, res); + argmatch_valid (arglist, vallist, valsize); + (*exit_fn) (); + + return -1; /* To please the compilers. */ +} + +/* Look for VALUE in VALLIST, an array of objects of size VALSIZE and + return the first corresponding argument in ARGLIST */ +const char * +argmatch_to_argument (const char *value, + const char *const *arglist, + const char *vallist, size_t valsize) +{ + size_t i; + + for (i = 0; arglist[i]; i++) + if (!memcmp (value, vallist + valsize * i, valsize)) + return arglist[i]; + return NULL; +} + +#ifdef TEST +/* + * Based on "getversion.c" by David MacKenzie + */ +char *program_name; + +/* When to make backup files. */ +enum backup_type +{ + /* Never make backups. */ + no_backups, + + /* Make simple backups of every file. */ + simple_backups, + + /* Make numbered backups of files that already have numbered backups, + and simple backups of the others. */ + numbered_existing_backups, + + /* Make numbered backups of every file. */ + numbered_backups +}; + +/* Two tables describing arguments (keys) and their corresponding + values */ +static const char *const backup_args[] = +{ + "no", "none", "off", + "simple", "never", + "existing", "nil", + "numbered", "t", + 0 +}; + +static const enum backup_type backup_vals[] = +{ + no_backups, no_backups, no_backups, + simple_backups, simple_backups, + numbered_existing_backups, numbered_existing_backups, + numbered_backups, numbered_backups +}; + +int +main (int argc, const char *const *argv) +{ + const char *cp; + enum backup_type backup_type = no_backups; + + program_name = (char *) argv[0]; + + if (argc > 2) + { + fprintf (stderr, "Usage: %s [VERSION_CONTROL]\n", program_name); + exit (1); + } + + if ((cp = getenv ("VERSION_CONTROL"))) + backup_type = XARGMATCH ("$VERSION_CONTROL", cp, + backup_args, backup_vals); + + if (argc == 2) + backup_type = XARGMATCH (program_name, argv[1], + backup_args, backup_vals); + + printf ("The version control is '%s'\n", + ARGMATCH_TO_ARGUMENT (backup_type, backup_args, backup_vals)); + + return 0; +} +#endif diff --git a/lib/argmatch.h b/lib/argmatch.h new file mode 100644 index 000000000..e4c802714 --- /dev/null +++ b/lib/argmatch.h @@ -0,0 +1,111 @@ +/* argmatch.h -- definitions and prototypes for argmatch.c + + Copyright (C) 1990, 1998-1999, 2001-2002, 2004-2005, 2009-2013 Free Software + Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by David MacKenzie + Modified by Akim Demaille */ + +#ifndef ARGMATCH_H_ +# define ARGMATCH_H_ 1 + +# include + +# include "verify.h" + +#ifdef __cplusplus +extern "C" { +#endif + +# define ARRAY_CARDINALITY(Array) (sizeof (Array) / sizeof *(Array)) + +/* Assert there are as many real arguments as there are values + (argument list ends with a NULL guard). */ + +# define ARGMATCH_VERIFY(Arglist, Vallist) \ + verify (ARRAY_CARDINALITY (Arglist) == ARRAY_CARDINALITY (Vallist) + 1) + +/* Return the index of the element of ARGLIST (NULL terminated) that + matches with ARG. If VALLIST is not NULL, then use it to resolve + false ambiguities (i.e., different matches of ARG but corresponding + to the same values in VALLIST). */ + +ptrdiff_t argmatch (char const *arg, char const *const *arglist, + char const *vallist, size_t valsize) _GL_ATTRIBUTE_PURE; + +# define ARGMATCH(Arg, Arglist, Vallist) \ + argmatch (Arg, Arglist, (char const *) (Vallist), sizeof *(Vallist)) + +/* xargmatch calls this function when it fails. This function should not + return. By default, this is a function that calls ARGMATCH_DIE which + in turn defaults to 'exit (exit_failure)'. */ +typedef void (*argmatch_exit_fn) (void); +extern argmatch_exit_fn argmatch_die; + +/* Report on stderr why argmatch failed. Report correct values. */ + +void argmatch_invalid (char const *context, char const *value, + ptrdiff_t problem); + +/* Left for compatibility with the old name invalid_arg */ + +# define invalid_arg(Context, Value, Problem) \ + argmatch_invalid (Context, Value, Problem) + + + +/* Report on stderr the list of possible arguments. */ + +void argmatch_valid (char const *const *arglist, + char const *vallist, size_t valsize); + +# define ARGMATCH_VALID(Arglist, Vallist) \ + argmatch_valid (Arglist, (char const *) (Vallist), sizeof *(Vallist)) + + + +/* Same as argmatch, but upon failure, report an explanation of the + failure, and exit using the function EXIT_FN. */ + +ptrdiff_t __xargmatch_internal (char const *context, + char const *arg, char const *const *arglist, + char const *vallist, size_t valsize, + argmatch_exit_fn exit_fn); + +/* Programmer friendly interface to __xargmatch_internal. */ + +# define XARGMATCH(Context, Arg, Arglist, Vallist) \ + ((Vallist) [__xargmatch_internal (Context, Arg, Arglist, \ + (char const *) (Vallist), \ + sizeof *(Vallist), \ + argmatch_die)]) + +/* Convert a value into a corresponding argument. */ + +char const *argmatch_to_argument (char const *value, + char const *const *arglist, + char const *vallist, size_t valsize) + _GL_ATTRIBUTE_PURE; + +# define ARGMATCH_TO_ARGUMENT(Value, Arglist, Vallist) \ + argmatch_to_argument (Value, Arglist, \ + (char const *) (Vallist), sizeof *(Vallist)) + +#ifdef __cplusplus +} +#endif + +#endif /* ARGMATCH_H_ */ diff --git a/lib/c-ctype.c b/lib/c-ctype.c new file mode 100644 index 000000000..752d2e32a --- /dev/null +++ b/lib/c-ctype.c @@ -0,0 +1,395 @@ +/* Character handling in C locale. + + Copyright 2000-2003, 2006, 2009-2013 Free Software Foundation, Inc. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, see . */ + +#include + +/* Specification. */ +#define NO_C_CTYPE_MACROS +#include "c-ctype.h" + +/* The function isascii is not locale dependent. Its use in EBCDIC is + questionable. */ +bool +c_isascii (int c) +{ + return (c >= 0x00 && c <= 0x7f); +} + +bool +c_isalnum (int c) +{ +#if C_CTYPE_CONSECUTIVE_DIGITS \ + && C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE +#if C_CTYPE_ASCII + return ((c >= '0' && c <= '9') + || ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'Z')); +#else + return ((c >= '0' && c <= '9') + || (c >= 'A' && c <= 'Z') + || (c >= 'a' && c <= 'z')); +#endif +#else + switch (c) + { + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + return 1; + default: + return 0; + } +#endif +} + +bool +c_isalpha (int c) +{ +#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE +#if C_CTYPE_ASCII + return ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'Z'); +#else + return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')); +#endif +#else + switch (c) + { + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + return 1; + default: + return 0; + } +#endif +} + +bool +c_isblank (int c) +{ + return (c == ' ' || c == '\t'); +} + +bool +c_iscntrl (int c) +{ +#if C_CTYPE_ASCII + return ((c & ~0x1f) == 0 || c == 0x7f); +#else + switch (c) + { + case ' ': case '!': case '"': case '#': case '$': case '%': + case '&': case '\'': case '(': case ')': case '*': case '+': + case ',': case '-': case '.': case '/': + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + case ':': case ';': case '<': case '=': case '>': case '?': + case '@': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case '[': case '\\': case ']': case '^': case '_': case '`': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case '{': case '|': case '}': case '~': + return 0; + default: + return 1; + } +#endif +} + +bool +c_isdigit (int c) +{ +#if C_CTYPE_CONSECUTIVE_DIGITS + return (c >= '0' && c <= '9'); +#else + switch (c) + { + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + return 1; + default: + return 0; + } +#endif +} + +bool +c_islower (int c) +{ +#if C_CTYPE_CONSECUTIVE_LOWERCASE + return (c >= 'a' && c <= 'z'); +#else + switch (c) + { + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + return 1; + default: + return 0; + } +#endif +} + +bool +c_isgraph (int c) +{ +#if C_CTYPE_ASCII + return (c >= '!' && c <= '~'); +#else + switch (c) + { + case '!': case '"': case '#': case '$': case '%': case '&': + case '\'': case '(': case ')': case '*': case '+': case ',': + case '-': case '.': case '/': + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + case ':': case ';': case '<': case '=': case '>': case '?': + case '@': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case '[': case '\\': case ']': case '^': case '_': case '`': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case '{': case '|': case '}': case '~': + return 1; + default: + return 0; + } +#endif +} + +bool +c_isprint (int c) +{ +#if C_CTYPE_ASCII + return (c >= ' ' && c <= '~'); +#else + switch (c) + { + case ' ': case '!': case '"': case '#': case '$': case '%': + case '&': case '\'': case '(': case ')': case '*': case '+': + case ',': case '-': case '.': case '/': + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + case ':': case ';': case '<': case '=': case '>': case '?': + case '@': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case '[': case '\\': case ']': case '^': case '_': case '`': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case '{': case '|': case '}': case '~': + return 1; + default: + return 0; + } +#endif +} + +bool +c_ispunct (int c) +{ +#if C_CTYPE_ASCII + return ((c >= '!' && c <= '~') + && !((c >= '0' && c <= '9') + || ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'Z'))); +#else + switch (c) + { + case '!': case '"': case '#': case '$': case '%': case '&': + case '\'': case '(': case ')': case '*': case '+': case ',': + case '-': case '.': case '/': + case ':': case ';': case '<': case '=': case '>': case '?': + case '@': + case '[': case '\\': case ']': case '^': case '_': case '`': + case '{': case '|': case '}': case '~': + return 1; + default: + return 0; + } +#endif +} + +bool +c_isspace (int c) +{ + return (c == ' ' || c == '\t' + || c == '\n' || c == '\v' || c == '\f' || c == '\r'); +} + +bool +c_isupper (int c) +{ +#if C_CTYPE_CONSECUTIVE_UPPERCASE + return (c >= 'A' && c <= 'Z'); +#else + switch (c) + { + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + return 1; + default: + return 0; + } +#endif +} + +bool +c_isxdigit (int c) +{ +#if C_CTYPE_CONSECUTIVE_DIGITS \ + && C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE +#if C_CTYPE_ASCII + return ((c >= '0' && c <= '9') + || ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'F')); +#else + return ((c >= '0' && c <= '9') + || (c >= 'A' && c <= 'F') + || (c >= 'a' && c <= 'f')); +#endif +#else + switch (c) + { + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + return 1; + default: + return 0; + } +#endif +} + +int +c_tolower (int c) +{ +#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE + return (c >= 'A' && c <= 'Z' ? c - 'A' + 'a' : c); +#else + switch (c) + { + case 'A': return 'a'; + case 'B': return 'b'; + case 'C': return 'c'; + case 'D': return 'd'; + case 'E': return 'e'; + case 'F': return 'f'; + case 'G': return 'g'; + case 'H': return 'h'; + case 'I': return 'i'; + case 'J': return 'j'; + case 'K': return 'k'; + case 'L': return 'l'; + case 'M': return 'm'; + case 'N': return 'n'; + case 'O': return 'o'; + case 'P': return 'p'; + case 'Q': return 'q'; + case 'R': return 'r'; + case 'S': return 's'; + case 'T': return 't'; + case 'U': return 'u'; + case 'V': return 'v'; + case 'W': return 'w'; + case 'X': return 'x'; + case 'Y': return 'y'; + case 'Z': return 'z'; + default: return c; + } +#endif +} + +int +c_toupper (int c) +{ +#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE + return (c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c); +#else + switch (c) + { + case 'a': return 'A'; + case 'b': return 'B'; + case 'c': return 'C'; + case 'd': return 'D'; + case 'e': return 'E'; + case 'f': return 'F'; + case 'g': return 'G'; + case 'h': return 'H'; + case 'i': return 'I'; + case 'j': return 'J'; + case 'k': return 'K'; + case 'l': return 'L'; + case 'm': return 'M'; + case 'n': return 'N'; + case 'o': return 'O'; + case 'p': return 'P'; + case 'q': return 'Q'; + case 'r': return 'R'; + case 's': return 'S'; + case 't': return 'T'; + case 'u': return 'U'; + case 'v': return 'V'; + case 'w': return 'W'; + case 'x': return 'X'; + case 'y': return 'Y'; + case 'z': return 'Z'; + default: return c; + } +#endif +} diff --git a/lib/c-ctype.h b/lib/c-ctype.h new file mode 100644 index 000000000..3a66440ae --- /dev/null +++ b/lib/c-ctype.h @@ -0,0 +1,294 @@ +/* Character handling in C locale. + + These functions work like the corresponding functions in , + except that they have the C (POSIX) locale hardwired, whereas the + functions' behaviour depends on the current locale set via + setlocale. + + Copyright (C) 2000-2003, 2006, 2008-2013 Free Software Foundation, Inc. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, see . */ + +#ifndef C_CTYPE_H +#define C_CTYPE_H + +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* The functions defined in this file assume the "C" locale and a character + set without diacritics (ASCII-US or EBCDIC-US or something like that). + Even if the "C" locale on a particular system is an extension of the ASCII + character set (like on BeOS, where it is UTF-8, or on AmigaOS, where it + is ISO-8859-1), the functions in this file recognize only the ASCII + characters. */ + + +/* Check whether the ASCII optimizations apply. */ + +/* ANSI C89 (and ISO C99 5.2.1.3 too) already guarantees that + '0', '1', ..., '9' have consecutive integer values. */ +#define C_CTYPE_CONSECUTIVE_DIGITS 1 + +#if ('A' <= 'Z') \ + && ('A' + 1 == 'B') && ('B' + 1 == 'C') && ('C' + 1 == 'D') \ + && ('D' + 1 == 'E') && ('E' + 1 == 'F') && ('F' + 1 == 'G') \ + && ('G' + 1 == 'H') && ('H' + 1 == 'I') && ('I' + 1 == 'J') \ + && ('J' + 1 == 'K') && ('K' + 1 == 'L') && ('L' + 1 == 'M') \ + && ('M' + 1 == 'N') && ('N' + 1 == 'O') && ('O' + 1 == 'P') \ + && ('P' + 1 == 'Q') && ('Q' + 1 == 'R') && ('R' + 1 == 'S') \ + && ('S' + 1 == 'T') && ('T' + 1 == 'U') && ('U' + 1 == 'V') \ + && ('V' + 1 == 'W') && ('W' + 1 == 'X') && ('X' + 1 == 'Y') \ + && ('Y' + 1 == 'Z') +#define C_CTYPE_CONSECUTIVE_UPPERCASE 1 +#endif + +#if ('a' <= 'z') \ + && ('a' + 1 == 'b') && ('b' + 1 == 'c') && ('c' + 1 == 'd') \ + && ('d' + 1 == 'e') && ('e' + 1 == 'f') && ('f' + 1 == 'g') \ + && ('g' + 1 == 'h') && ('h' + 1 == 'i') && ('i' + 1 == 'j') \ + && ('j' + 1 == 'k') && ('k' + 1 == 'l') && ('l' + 1 == 'm') \ + && ('m' + 1 == 'n') && ('n' + 1 == 'o') && ('o' + 1 == 'p') \ + && ('p' + 1 == 'q') && ('q' + 1 == 'r') && ('r' + 1 == 's') \ + && ('s' + 1 == 't') && ('t' + 1 == 'u') && ('u' + 1 == 'v') \ + && ('v' + 1 == 'w') && ('w' + 1 == 'x') && ('x' + 1 == 'y') \ + && ('y' + 1 == 'z') +#define C_CTYPE_CONSECUTIVE_LOWERCASE 1 +#endif + +#if (' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ + && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ + && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ + && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ + && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ + && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ + && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ + && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ + && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ + && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ + && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ + && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ + && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ + && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ + && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ + && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ + && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ + && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ + && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ + && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ + && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ + && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ + && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126) +/* The character set is ASCII or one of its variants or extensions, not EBCDIC. + Testing the value of '\n' and '\r' is not relevant. */ +#define C_CTYPE_ASCII 1 +#endif + + +/* Function declarations. */ + +/* Unlike the functions in , which require an argument in the range + of the 'unsigned char' type, the functions here operate on values that are + in the 'unsigned char' range or in the 'char' range. In other words, + when you have a 'char' value, you need to cast it before using it as + argument to a function: + + const char *s = ...; + if (isalpha ((unsigned char) *s)) ... + + but you don't need to cast it for the functions defined in this file: + + const char *s = ...; + if (c_isalpha (*s)) ... + */ + +extern bool c_isascii (int c) _GL_ATTRIBUTE_CONST; /* not locale dependent */ + +extern bool c_isalnum (int c) _GL_ATTRIBUTE_CONST; +extern bool c_isalpha (int c) _GL_ATTRIBUTE_CONST; +extern bool c_isblank (int c) _GL_ATTRIBUTE_CONST; +extern bool c_iscntrl (int c) _GL_ATTRIBUTE_CONST; +extern bool c_isdigit (int c) _GL_ATTRIBUTE_CONST; +extern bool c_islower (int c) _GL_ATTRIBUTE_CONST; +extern bool c_isgraph (int c) _GL_ATTRIBUTE_CONST; +extern bool c_isprint (int c) _GL_ATTRIBUTE_CONST; +extern bool c_ispunct (int c) _GL_ATTRIBUTE_CONST; +extern bool c_isspace (int c) _GL_ATTRIBUTE_CONST; +extern bool c_isupper (int c) _GL_ATTRIBUTE_CONST; +extern bool c_isxdigit (int c) _GL_ATTRIBUTE_CONST; + +extern int c_tolower (int c) _GL_ATTRIBUTE_CONST; +extern int c_toupper (int c) _GL_ATTRIBUTE_CONST; + + +#if defined __GNUC__ && defined __OPTIMIZE__ && !defined __OPTIMIZE_SIZE__ && !defined NO_C_CTYPE_MACROS + +/* ASCII optimizations. */ + +#undef c_isascii +#define c_isascii(c) \ + ({ int __c = (c); \ + (__c >= 0x00 && __c <= 0x7f); \ + }) + +#if C_CTYPE_CONSECUTIVE_DIGITS \ + && C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE +#if C_CTYPE_ASCII +#undef c_isalnum +#define c_isalnum(c) \ + ({ int __c = (c); \ + ((__c >= '0' && __c <= '9') \ + || ((__c & ~0x20) >= 'A' && (__c & ~0x20) <= 'Z')); \ + }) +#else +#undef c_isalnum +#define c_isalnum(c) \ + ({ int __c = (c); \ + ((__c >= '0' && __c <= '9') \ + || (__c >= 'A' && __c <= 'Z') \ + || (__c >= 'a' && __c <= 'z')); \ + }) +#endif +#endif + +#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE +#if C_CTYPE_ASCII +#undef c_isalpha +#define c_isalpha(c) \ + ({ int __c = (c); \ + ((__c & ~0x20) >= 'A' && (__c & ~0x20) <= 'Z'); \ + }) +#else +#undef c_isalpha +#define c_isalpha(c) \ + ({ int __c = (c); \ + ((__c >= 'A' && __c <= 'Z') || (__c >= 'a' && __c <= 'z')); \ + }) +#endif +#endif + +#undef c_isblank +#define c_isblank(c) \ + ({ int __c = (c); \ + (__c == ' ' || __c == '\t'); \ + }) + +#if C_CTYPE_ASCII +#undef c_iscntrl +#define c_iscntrl(c) \ + ({ int __c = (c); \ + ((__c & ~0x1f) == 0 || __c == 0x7f); \ + }) +#endif + +#if C_CTYPE_CONSECUTIVE_DIGITS +#undef c_isdigit +#define c_isdigit(c) \ + ({ int __c = (c); \ + (__c >= '0' && __c <= '9'); \ + }) +#endif + +#if C_CTYPE_CONSECUTIVE_LOWERCASE +#undef c_islower +#define c_islower(c) \ + ({ int __c = (c); \ + (__c >= 'a' && __c <= 'z'); \ + }) +#endif + +#if C_CTYPE_ASCII +#undef c_isgraph +#define c_isgraph(c) \ + ({ int __c = (c); \ + (__c >= '!' && __c <= '~'); \ + }) +#endif + +#if C_CTYPE_ASCII +#undef c_isprint +#define c_isprint(c) \ + ({ int __c = (c); \ + (__c >= ' ' && __c <= '~'); \ + }) +#endif + +#if C_CTYPE_ASCII +#undef c_ispunct +#define c_ispunct(c) \ + ({ int _c = (c); \ + (c_isgraph (_c) && ! c_isalnum (_c)); \ + }) +#endif + +#undef c_isspace +#define c_isspace(c) \ + ({ int __c = (c); \ + (__c == ' ' || __c == '\t' \ + || __c == '\n' || __c == '\v' || __c == '\f' || __c == '\r'); \ + }) + +#if C_CTYPE_CONSECUTIVE_UPPERCASE +#undef c_isupper +#define c_isupper(c) \ + ({ int __c = (c); \ + (__c >= 'A' && __c <= 'Z'); \ + }) +#endif + +#if C_CTYPE_CONSECUTIVE_DIGITS \ + && C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE +#if C_CTYPE_ASCII +#undef c_isxdigit +#define c_isxdigit(c) \ + ({ int __c = (c); \ + ((__c >= '0' && __c <= '9') \ + || ((__c & ~0x20) >= 'A' && (__c & ~0x20) <= 'F')); \ + }) +#else +#undef c_isxdigit +#define c_isxdigit(c) \ + ({ int __c = (c); \ + ((__c >= '0' && __c <= '9') \ + || (__c >= 'A' && __c <= 'F') \ + || (__c >= 'a' && __c <= 'f')); \ + }) +#endif +#endif + +#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE +#undef c_tolower +#define c_tolower(c) \ + ({ int __c = (c); \ + (__c >= 'A' && __c <= 'Z' ? __c - 'A' + 'a' : __c); \ + }) +#undef c_toupper +#define c_toupper(c) \ + ({ int __c = (c); \ + (__c >= 'a' && __c <= 'z' ? __c - 'a' + 'A' : __c); \ + }) +#endif + +#endif /* optimizing for speed */ + + +#ifdef __cplusplus +} +#endif + +#endif /* C_CTYPE_H */ diff --git a/lib/c-strcase.h b/lib/c-strcase.h new file mode 100644 index 000000000..49e1bb03d --- /dev/null +++ b/lib/c-strcase.h @@ -0,0 +1,56 @@ +/* Case-insensitive string comparison functions in C locale. + Copyright (C) 1995-1996, 2001, 2003, 2005, 2009-2013 Free Software + Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see . */ + +#ifndef C_STRCASE_H +#define C_STRCASE_H + +#include + + +/* The functions defined in this file assume the "C" locale and a character + set without diacritics (ASCII-US or EBCDIC-US or something like that). + Even if the "C" locale on a particular system is an extension of the ASCII + character set (like on BeOS, where it is UTF-8, or on AmigaOS, where it + is ISO-8859-1), the functions in this file recognize only the ASCII + characters. More precisely, one of the string arguments must be an ASCII + string; the other one can also contain non-ASCII characters (but then + the comparison result will be nonzero). */ + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Compare strings S1 and S2, ignoring case, returning less than, equal to or + greater than zero if S1 is lexicographically less than, equal to or greater + than S2. */ +extern int c_strcasecmp (const char *s1, const char *s2) _GL_ATTRIBUTE_PURE; + +/* Compare no more than N characters of strings S1 and S2, ignoring case, + returning less than, equal to or greater than zero if S1 is + lexicographically less than, equal to or greater than S2. */ +extern int c_strncasecmp (const char *s1, const char *s2, size_t n) + _GL_ATTRIBUTE_PURE; + + +#ifdef __cplusplus +} +#endif + + +#endif /* C_STRCASE_H */ diff --git a/lib/c-strcasecmp.c b/lib/c-strcasecmp.c new file mode 100644 index 000000000..ef85f0e67 --- /dev/null +++ b/lib/c-strcasecmp.c @@ -0,0 +1,56 @@ +/* c-strcasecmp.c -- case insensitive string comparator in C locale + Copyright (C) 1998-1999, 2005-2006, 2009-2013 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see . */ + +#include + +/* Specification. */ +#include "c-strcase.h" + +#include + +#include "c-ctype.h" + +int +c_strcasecmp (const char *s1, const char *s2) +{ + register const unsigned char *p1 = (const unsigned char *) s1; + register const unsigned char *p2 = (const unsigned char *) s2; + unsigned char c1, c2; + + if (p1 == p2) + return 0; + + do + { + c1 = c_tolower (*p1); + c2 = c_tolower (*p2); + + if (c1 == '\0') + break; + + ++p1; + ++p2; + } + while (c1 == c2); + + if (UCHAR_MAX <= INT_MAX) + return c1 - c2; + else + /* On machines where 'char' and 'int' are types of the same size, the + difference of two 'unsigned char' values - including the sign bit - + doesn't fit in an 'int'. */ + return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0); +} diff --git a/lib/c-strcaseeq.h b/lib/c-strcaseeq.h new file mode 100644 index 000000000..afdea26ba --- /dev/null +++ b/lib/c-strcaseeq.h @@ -0,0 +1,184 @@ +/* Optimized case-insensitive string comparison in C locale. + Copyright (C) 2001-2002, 2007, 2009-2013 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible . */ + +#include "c-strcase.h" +#include "c-ctype.h" + +/* STRCASEEQ allows to optimize string comparison with a small literal string. + STRCASEEQ (s, "UTF-8", 'U','T','F','-','8',0,0,0,0) + is semantically equivalent to + c_strcasecmp (s, "UTF-8") == 0 + just faster. */ + +/* Help GCC to generate good code for string comparisons with + immediate strings. */ +#if defined (__GNUC__) && defined (__OPTIMIZE__) + +/* Case insensitive comparison of ASCII characters. */ +# if C_CTYPE_ASCII +# define CASEEQ(other,upper) \ + (c_isupper (upper) ? ((other) & ~0x20) == (upper) : (other) == (upper)) +# elif C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE +# define CASEEQ(other,upper) \ + (c_isupper (upper) ? (other) == (upper) || (other) == (upper) - 'A' + 'a' : (other) == (upper)) +# else +# define CASEEQ(other,upper) \ + (c_toupper (other) == (upper)) +# endif + +static inline int +strcaseeq9 (const char *s1, const char *s2) +{ + return c_strcasecmp (s1 + 9, s2 + 9) == 0; +} + +static inline int +strcaseeq8 (const char *s1, const char *s2, char s28) +{ + if (CASEEQ (s1[8], s28)) + { + if (s28 == 0) + return 1; + else + return strcaseeq9 (s1, s2); + } + else + return 0; +} + +static inline int +strcaseeq7 (const char *s1, const char *s2, char s27, char s28) +{ + if (CASEEQ (s1[7], s27)) + { + if (s27 == 0) + return 1; + else + return strcaseeq8 (s1, s2, s28); + } + else + return 0; +} + +static inline int +strcaseeq6 (const char *s1, const char *s2, char s26, char s27, char s28) +{ + if (CASEEQ (s1[6], s26)) + { + if (s26 == 0) + return 1; + else + return strcaseeq7 (s1, s2, s27, s28); + } + else + return 0; +} + +static inline int +strcaseeq5 (const char *s1, const char *s2, char s25, char s26, char s27, char s28) +{ + if (CASEEQ (s1[5], s25)) + { + if (s25 == 0) + return 1; + else + return strcaseeq6 (s1, s2, s26, s27, s28); + } + else + return 0; +} + +static inline int +strcaseeq4 (const char *s1, const char *s2, char s24, char s25, char s26, char s27, char s28) +{ + if (CASEEQ (s1[4], s24)) + { + if (s24 == 0) + return 1; + else + return strcaseeq5 (s1, s2, s25, s26, s27, s28); + } + else + return 0; +} + +static inline int +strcaseeq3 (const char *s1, const char *s2, char s23, char s24, char s25, char s26, char s27, char s28) +{ + if (CASEEQ (s1[3], s23)) + { + if (s23 == 0) + return 1; + else + return strcaseeq4 (s1, s2, s24, s25, s26, s27, s28); + } + else + return 0; +} + +static inline int +strcaseeq2 (const char *s1, const char *s2, char s22, char s23, char s24, char s25, char s26, char s27, char s28) +{ + if (CASEEQ (s1[2], s22)) + { + if (s22 == 0) + return 1; + else + return strcaseeq3 (s1, s2, s23, s24, s25, s26, s27, s28); + } + else + return 0; +} + +static inline int +strcaseeq1 (const char *s1, const char *s2, char s21, char s22, char s23, char s24, char s25, char s26, char s27, char s28) +{ + if (CASEEQ (s1[1], s21)) + { + if (s21 == 0) + return 1; + else + return strcaseeq2 (s1, s2, s22, s23, s24, s25, s26, s27, s28); + } + else + return 0; +} + +static inline int +strcaseeq0 (const char *s1, const char *s2, char s20, char s21, char s22, char s23, char s24, char s25, char s26, char s27, char s28) +{ + if (CASEEQ (s1[0], s20)) + { + if (s20 == 0) + return 1; + else + return strcaseeq1 (s1, s2, s21, s22, s23, s24, s25, s26, s27, s28); + } + else + return 0; +} + +#define STRCASEEQ(s1,s2,s20,s21,s22,s23,s24,s25,s26,s27,s28) \ + strcaseeq0 (s1, s2, s20, s21, s22, s23, s24, s25, s26, s27, s28) + +#else + +#define STRCASEEQ(s1,s2,s20,s21,s22,s23,s24,s25,s26,s27,s28) \ + (c_strcasecmp (s1, s2) == 0) + +#endif diff --git a/lib/c-strncasecmp.c b/lib/c-strncasecmp.c new file mode 100644 index 000000000..04404b00c --- /dev/null +++ b/lib/c-strncasecmp.c @@ -0,0 +1,56 @@ +/* c-strncasecmp.c -- case insensitive string comparator in C locale + Copyright (C) 1998-1999, 2005-2006, 2009-2013 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see . */ + +#include + +/* Specification. */ +#include "c-strcase.h" + +#include + +#include "c-ctype.h" + +int +c_strncasecmp (const char *s1, const char *s2, size_t n) +{ + register const unsigned char *p1 = (const unsigned char *) s1; + register const unsigned char *p2 = (const unsigned char *) s2; + unsigned char c1, c2; + + if (p1 == p2 || n == 0) + return 0; + + do + { + c1 = c_tolower (*p1); + c2 = c_tolower (*p2); + + if (--n == 0 || c1 == '\0') + break; + + ++p1; + ++p2; + } + while (c1 == c2); + + if (UCHAR_MAX <= INT_MAX) + return c1 - c2; + else + /* On machines where 'char' and 'int' are types of the same size, the + difference of two 'unsigned char' values - including the sign bit - + doesn't fit in an 'int'. */ + return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0); +} diff --git a/lib/config.charset b/lib/config.charset new file mode 100644 index 000000000..a991419cd --- /dev/null +++ b/lib/config.charset @@ -0,0 +1,684 @@ +#! /bin/sh +# Output a system dependent table of character encoding aliases. +# +# Copyright (C) 2000-2004, 2006-2013 Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, see . +# +# The table consists of lines of the form +# ALIAS CANONICAL +# +# ALIAS is the (system dependent) result of "nl_langinfo (CODESET)". +# ALIAS is compared in a case sensitive way. +# +# CANONICAL is the GNU canonical name for this character encoding. +# It must be an encoding supported by libiconv. Support by GNU libc is +# also desirable. CANONICAL is case insensitive. Usually an upper case +# MIME charset name is preferred. +# The current list of GNU canonical charset names is as follows. +# +# name MIME? used by which systems +# (darwin = Mac OS X, woe32 = native Windows) +# +# ASCII, ANSI_X3.4-1968 glibc solaris freebsd netbsd darwin cygwin +# ISO-8859-1 Y glibc aix hpux irix osf solaris freebsd netbsd openbsd darwin cygwin +# ISO-8859-2 Y glibc aix hpux irix osf solaris freebsd netbsd openbsd darwin cygwin +# ISO-8859-3 Y glibc solaris cygwin +# ISO-8859-4 Y osf solaris freebsd netbsd openbsd darwin +# ISO-8859-5 Y glibc aix hpux irix osf solaris freebsd netbsd openbsd darwin cygwin +# ISO-8859-6 Y glibc aix hpux solaris cygwin +# ISO-8859-7 Y glibc aix hpux irix osf solaris netbsd openbsd darwin cygwin +# ISO-8859-8 Y glibc aix hpux osf solaris cygwin +# ISO-8859-9 Y glibc aix hpux irix osf solaris darwin cygwin +# ISO-8859-13 glibc netbsd openbsd darwin cygwin +# ISO-8859-14 glibc cygwin +# ISO-8859-15 glibc aix osf solaris freebsd netbsd openbsd darwin cygwin +# KOI8-R Y glibc solaris freebsd netbsd openbsd darwin +# KOI8-U Y glibc freebsd netbsd openbsd darwin cygwin +# KOI8-T glibc +# CP437 dos +# CP775 dos +# CP850 aix osf dos +# CP852 dos +# CP855 dos +# CP856 aix +# CP857 dos +# CP861 dos +# CP862 dos +# CP864 dos +# CP865 dos +# CP866 freebsd netbsd openbsd darwin dos +# CP869 dos +# CP874 woe32 dos +# CP922 aix +# CP932 aix cygwin woe32 dos +# CP943 aix +# CP949 osf darwin woe32 dos +# CP950 woe32 dos +# CP1046 aix +# CP1124 aix +# CP1125 dos +# CP1129 aix +# CP1131 darwin +# CP1250 woe32 +# CP1251 glibc solaris netbsd openbsd darwin cygwin woe32 +# CP1252 aix woe32 +# CP1253 woe32 +# CP1254 woe32 +# CP1255 glibc woe32 +# CP1256 woe32 +# CP1257 woe32 +# GB2312 Y glibc aix hpux irix solaris freebsd netbsd darwin +# EUC-JP Y glibc aix hpux irix osf solaris freebsd netbsd darwin +# EUC-KR Y glibc aix hpux irix osf solaris freebsd netbsd darwin cygwin +# EUC-TW glibc aix hpux irix osf solaris netbsd +# BIG5 Y glibc aix hpux osf solaris freebsd netbsd darwin cygwin +# BIG5-HKSCS glibc solaris darwin +# GBK glibc aix osf solaris darwin cygwin woe32 dos +# GB18030 glibc solaris netbsd darwin +# SHIFT_JIS Y hpux osf solaris freebsd netbsd darwin +# JOHAB glibc solaris woe32 +# TIS-620 glibc aix hpux osf solaris cygwin +# VISCII Y glibc +# TCVN5712-1 glibc +# ARMSCII-8 glibc darwin +# GEORGIAN-PS glibc cygwin +# PT154 glibc +# HP-ROMAN8 hpux +# HP-ARABIC8 hpux +# HP-GREEK8 hpux +# HP-HEBREW8 hpux +# HP-TURKISH8 hpux +# HP-KANA8 hpux +# DEC-KANJI osf +# DEC-HANYU osf +# UTF-8 Y glibc aix hpux osf solaris netbsd darwin cygwin +# +# Note: Names which are not marked as being a MIME name should not be used in +# Internet protocols for information interchange (mail, news, etc.). +# +# Note: ASCII and ANSI_X3.4-1968 are synonymous canonical names. Applications +# must understand both names and treat them as equivalent. +# +# The first argument passed to this file is the canonical host specification, +# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM +# or +# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM + +host="$1" +os=`echo "$host" | sed -e 's/^[^-]*-[^-]*-\(.*\)$/\1/'` +echo "# This file contains a table of character encoding aliases," +echo "# suitable for operating system '${os}'." +echo "# It was automatically generated from config.charset." +# List of references, updated during installation: +echo "# Packages using this file: " +case "$os" in + linux-gnulibc1*) + # Linux libc5 doesn't have nl_langinfo(CODESET); therefore + # localcharset.c falls back to using the full locale name + # from the environment variables. + echo "C ASCII" + echo "POSIX ASCII" + for l in af af_ZA ca ca_ES da da_DK de de_AT de_BE de_CH de_DE de_LU \ + en en_AU en_BW en_CA en_DK en_GB en_IE en_NZ en_US en_ZA \ + en_ZW es es_AR es_BO es_CL es_CO es_DO es_EC es_ES es_GT \ + es_HN es_MX es_PA es_PE es_PY es_SV es_US es_UY es_VE et \ + et_EE eu eu_ES fi fi_FI fo fo_FO fr fr_BE fr_CA fr_CH fr_FR \ + fr_LU ga ga_IE gl gl_ES id id_ID in in_ID is is_IS it it_CH \ + it_IT kl kl_GL nl nl_BE nl_NL no no_NO pt pt_BR pt_PT sv \ + sv_FI sv_SE; do + echo "$l ISO-8859-1" + echo "$l.iso-8859-1 ISO-8859-1" + echo "$l.iso-8859-15 ISO-8859-15" + echo "$l.iso-8859-15@euro ISO-8859-15" + echo "$l@euro ISO-8859-15" + echo "$l.cp-437 CP437" + echo "$l.cp-850 CP850" + echo "$l.cp-1252 CP1252" + echo "$l.cp-1252@euro CP1252" + #echo "$l.atari-st ATARI-ST" # not a commonly used encoding + echo "$l.utf-8 UTF-8" + echo "$l.utf-8@euro UTF-8" + done + for l in cs cs_CZ hr hr_HR hu hu_HU pl pl_PL ro ro_RO sk sk_SK sl \ + sl_SI sr sr_CS sr_YU; do + echo "$l ISO-8859-2" + echo "$l.iso-8859-2 ISO-8859-2" + echo "$l.cp-852 CP852" + echo "$l.cp-1250 CP1250" + echo "$l.utf-8 UTF-8" + done + for l in mk mk_MK ru ru_RU; do + echo "$l ISO-8859-5" + echo "$l.iso-8859-5 ISO-8859-5" + echo "$l.koi8-r KOI8-R" + echo "$l.cp-866 CP866" + echo "$l.cp-1251 CP1251" + echo "$l.utf-8 UTF-8" + done + for l in ar ar_SA; do + echo "$l ISO-8859-6" + echo "$l.iso-8859-6 ISO-8859-6" + echo "$l.cp-864 CP864" + #echo "$l.cp-868 CP868" # not a commonly used encoding + echo "$l.cp-1256 CP1256" + echo "$l.utf-8 UTF-8" + done + for l in el el_GR gr gr_GR; do + echo "$l ISO-8859-7" + echo "$l.iso-8859-7 ISO-8859-7" + echo "$l.cp-869 CP869" + echo "$l.cp-1253 CP1253" + echo "$l.cp-1253@euro CP1253" + echo "$l.utf-8 UTF-8" + echo "$l.utf-8@euro UTF-8" + done + for l in he he_IL iw iw_IL; do + echo "$l ISO-8859-8" + echo "$l.iso-8859-8 ISO-8859-8" + echo "$l.cp-862 CP862" + echo "$l.cp-1255 CP1255" + echo "$l.utf-8 UTF-8" + done + for l in tr tr_TR; do + echo "$l ISO-8859-9" + echo "$l.iso-8859-9 ISO-8859-9" + echo "$l.cp-857 CP857" + echo "$l.cp-1254 CP1254" + echo "$l.utf-8 UTF-8" + done + for l in lt lt_LT lv lv_LV; do + #echo "$l BALTIC" # not a commonly used encoding, wrong encoding name + echo "$l ISO-8859-13" + done + for l in ru_UA uk uk_UA; do + echo "$l KOI8-U" + done + for l in zh zh_CN; do + #echo "$l GB_2312-80" # not a commonly used encoding, wrong encoding name + echo "$l GB2312" + done + for l in ja ja_JP ja_JP.EUC; do + echo "$l EUC-JP" + done + for l in ko ko_KR; do + echo "$l EUC-KR" + done + for l in th th_TH; do + echo "$l TIS-620" + done + for l in fa fa_IR; do + #echo "$l ISIRI-3342" # a broken encoding + echo "$l.utf-8 UTF-8" + done + ;; + linux* | *-gnu*) + # With glibc-2.1 or newer, we don't need any canonicalization, + # because glibc has iconv and both glibc and libiconv support all + # GNU canonical names directly. Therefore, the Makefile does not + # need to install the alias file at all. + # The following applies only to glibc-2.0.x and older libcs. + echo "ISO_646.IRV:1983 ASCII" + ;; + aix*) + echo "ISO8859-1 ISO-8859-1" + echo "ISO8859-2 ISO-8859-2" + echo "ISO8859-5 ISO-8859-5" + echo "ISO8859-6 ISO-8859-6" + echo "ISO8859-7 ISO-8859-7" + echo "ISO8859-8 ISO-8859-8" + echo "ISO8859-9 ISO-8859-9" + echo "ISO8859-15 ISO-8859-15" + echo "IBM-850 CP850" + echo "IBM-856 CP856" + echo "IBM-921 ISO-8859-13" + echo "IBM-922 CP922" + echo "IBM-932 CP932" + echo "IBM-943 CP943" + echo "IBM-1046 CP1046" + echo "IBM-1124 CP1124" + echo "IBM-1129 CP1129" + echo "IBM-1252 CP1252" + echo "IBM-eucCN GB2312" + echo "IBM-eucJP EUC-JP" + echo "IBM-eucKR EUC-KR" + echo "IBM-eucTW EUC-TW" + echo "big5 BIG5" + echo "GBK GBK" + echo "TIS-620 TIS-620" + echo "UTF-8 UTF-8" + ;; + hpux*) + echo "iso88591 ISO-8859-1" + echo "iso88592 ISO-8859-2" + echo "iso88595 ISO-8859-5" + echo "iso88596 ISO-8859-6" + echo "iso88597 ISO-8859-7" + echo "iso88598 ISO-8859-8" + echo "iso88599 ISO-8859-9" + echo "iso885915 ISO-8859-15" + echo "roman8 HP-ROMAN8" + echo "arabic8 HP-ARABIC8" + echo "greek8 HP-GREEK8" + echo "hebrew8 HP-HEBREW8" + echo "turkish8 HP-TURKISH8" + echo "kana8 HP-KANA8" + echo "tis620 TIS-620" + echo "big5 BIG5" + echo "eucJP EUC-JP" + echo "eucKR EUC-KR" + echo "eucTW EUC-TW" + echo "hp15CN GB2312" + #echo "ccdc ?" # what is this? + echo "SJIS SHIFT_JIS" + echo "utf8 UTF-8" + ;; + irix*) + echo "ISO8859-1 ISO-8859-1" + echo "ISO8859-2 ISO-8859-2" + echo "ISO8859-5 ISO-8859-5" + echo "ISO8859-7 ISO-8859-7" + echo "ISO8859-9 ISO-8859-9" + echo "eucCN GB2312" + echo "eucJP EUC-JP" + echo "eucKR EUC-KR" + echo "eucTW EUC-TW" + ;; + osf*) + echo "ISO8859-1 ISO-8859-1" + echo "ISO8859-2 ISO-8859-2" + echo "ISO8859-4 ISO-8859-4" + echo "ISO8859-5 ISO-8859-5" + echo "ISO8859-7 ISO-8859-7" + echo "ISO8859-8 ISO-8859-8" + echo "ISO8859-9 ISO-8859-9" + echo "ISO8859-15 ISO-8859-15" + echo "cp850 CP850" + echo "big5 BIG5" + echo "dechanyu DEC-HANYU" + echo "dechanzi GB2312" + echo "deckanji DEC-KANJI" + echo "deckorean EUC-KR" + echo "eucJP EUC-JP" + echo "eucKR EUC-KR" + echo "eucTW EUC-TW" + echo "GBK GBK" + echo "KSC5601 CP949" + echo "sdeckanji EUC-JP" + echo "SJIS SHIFT_JIS" + echo "TACTIS TIS-620" + echo "UTF-8 UTF-8" + ;; + solaris*) + echo "646 ASCII" + echo "ISO8859-1 ISO-8859-1" + echo "ISO8859-2 ISO-8859-2" + echo "ISO8859-3 ISO-8859-3" + echo "ISO8859-4 ISO-8859-4" + echo "ISO8859-5 ISO-8859-5" + echo "ISO8859-6 ISO-8859-6" + echo "ISO8859-7 ISO-8859-7" + echo "ISO8859-8 ISO-8859-8" + echo "ISO8859-9 ISO-8859-9" + echo "ISO8859-15 ISO-8859-15" + echo "koi8-r KOI8-R" + echo "ansi-1251 CP1251" + echo "BIG5 BIG5" + echo "Big5-HKSCS BIG5-HKSCS" + echo "gb2312 GB2312" + echo "GBK GBK" + echo "GB18030 GB18030" + echo "cns11643 EUC-TW" + echo "5601 EUC-KR" + echo "ko_KR.johap92 JOHAB" + echo "eucJP EUC-JP" + echo "PCK SHIFT_JIS" + echo "TIS620.2533 TIS-620" + #echo "sun_eu_greek ?" # what is this? + echo "UTF-8 UTF-8" + ;; + freebsd* | os2*) + # FreeBSD 4.2 doesn't have nl_langinfo(CODESET); therefore + # localcharset.c falls back to using the full locale name + # from the environment variables. + # Likewise for OS/2. OS/2 has XFree86 just like FreeBSD. Just + # reuse FreeBSD's locale data for OS/2. + echo "C ASCII" + echo "US-ASCII ASCII" + for l in la_LN lt_LN; do + echo "$l.ASCII ASCII" + done + for l in da_DK de_AT de_CH de_DE en_AU en_CA en_GB en_US es_ES \ + fi_FI fr_BE fr_CA fr_CH fr_FR is_IS it_CH it_IT la_LN \ + lt_LN nl_BE nl_NL no_NO pt_PT sv_SE; do + echo "$l.ISO_8859-1 ISO-8859-1" + echo "$l.DIS_8859-15 ISO-8859-15" + done + for l in cs_CZ hr_HR hu_HU la_LN lt_LN pl_PL sl_SI; do + echo "$l.ISO_8859-2 ISO-8859-2" + done + for l in la_LN lt_LT; do + echo "$l.ISO_8859-4 ISO-8859-4" + done + for l in ru_RU ru_SU; do + echo "$l.KOI8-R KOI8-R" + echo "$l.ISO_8859-5 ISO-8859-5" + echo "$l.CP866 CP866" + done + echo "uk_UA.KOI8-U KOI8-U" + echo "zh_TW.BIG5 BIG5" + echo "zh_TW.Big5 BIG5" + echo "zh_CN.EUC GB2312" + echo "ja_JP.EUC EUC-JP" + echo "ja_JP.SJIS SHIFT_JIS" + echo "ja_JP.Shift_JIS SHIFT_JIS" + echo "ko_KR.EUC EUC-KR" + ;; + netbsd*) + echo "646 ASCII" + echo "ISO8859-1 ISO-8859-1" + echo "ISO8859-2 ISO-8859-2" + echo "ISO8859-4 ISO-8859-4" + echo "ISO8859-5 ISO-8859-5" + echo "ISO8859-7 ISO-8859-7" + echo "ISO8859-13 ISO-8859-13" + echo "ISO8859-15 ISO-8859-15" + echo "eucCN GB2312" + echo "eucJP EUC-JP" + echo "eucKR EUC-KR" + echo "eucTW EUC-TW" + echo "BIG5 BIG5" + echo "SJIS SHIFT_JIS" + ;; + openbsd*) + echo "646 ASCII" + echo "ISO8859-1 ISO-8859-1" + echo "ISO8859-2 ISO-8859-2" + echo "ISO8859-4 ISO-8859-4" + echo "ISO8859-5 ISO-8859-5" + echo "ISO8859-7 ISO-8859-7" + echo "ISO8859-13 ISO-8859-13" + echo "ISO8859-15 ISO-8859-15" + ;; + darwin[56]*) + # Darwin 6.8 doesn't have nl_langinfo(CODESET); therefore + # localcharset.c falls back to using the full locale name + # from the environment variables. + echo "C ASCII" + for l in en_AU en_CA en_GB en_US la_LN; do + echo "$l.US-ASCII ASCII" + done + for l in da_DK de_AT de_CH de_DE en_AU en_CA en_GB en_US es_ES \ + fi_FI fr_BE fr_CA fr_CH fr_FR is_IS it_CH it_IT nl_BE \ + nl_NL no_NO pt_PT sv_SE; do + echo "$l ISO-8859-1" + echo "$l.ISO8859-1 ISO-8859-1" + echo "$l.ISO8859-15 ISO-8859-15" + done + for l in la_LN; do + echo "$l.ISO8859-1 ISO-8859-1" + echo "$l.ISO8859-15 ISO-8859-15" + done + for l in cs_CZ hr_HR hu_HU la_LN pl_PL sl_SI; do + echo "$l.ISO8859-2 ISO-8859-2" + done + for l in la_LN lt_LT; do + echo "$l.ISO8859-4 ISO-8859-4" + done + for l in ru_RU; do + echo "$l.KOI8-R KOI8-R" + echo "$l.ISO8859-5 ISO-8859-5" + echo "$l.CP866 CP866" + done + for l in bg_BG; do + echo "$l.CP1251 CP1251" + done + echo "uk_UA.KOI8-U KOI8-U" + echo "zh_TW.BIG5 BIG5" + echo "zh_TW.Big5 BIG5" + echo "zh_CN.EUC GB2312" + echo "ja_JP.EUC EUC-JP" + echo "ja_JP.SJIS SHIFT_JIS" + echo "ko_KR.EUC EUC-KR" + ;; + darwin*) + # Darwin 7.5 has nl_langinfo(CODESET), but sometimes its value is + # useless: + # - It returns the empty string when LANG is set to a locale of the + # form ll_CC, although ll_CC/LC_CTYPE is a symlink to an UTF-8 + # LC_CTYPE file. + # - The environment variables LANG, LC_CTYPE, LC_ALL are not set by + # the system; nl_langinfo(CODESET) returns "US-ASCII" in this case. + # - The documentation says: + # "... all code that calls BSD system routines should ensure + # that the const *char parameters of these routines are in UTF-8 + # encoding. All BSD system functions expect their string + # parameters to be in UTF-8 encoding and nothing else." + # It also says + # "An additional caveat is that string parameters for files, + # paths, and other file-system entities must be in canonical + # UTF-8. In a canonical UTF-8 Unicode string, all decomposable + # characters are decomposed ..." + # but this is not true: You can pass non-decomposed UTF-8 strings + # to file system functions, and it is the OS which will convert + # them to decomposed UTF-8 before accessing the file system. + # - The Apple Terminal application displays UTF-8 by default. + # - However, other applications are free to use different encodings: + # - xterm uses ISO-8859-1 by default. + # - TextEdit uses MacRoman by default. + # We prefer UTF-8 over decomposed UTF-8-MAC because one should + # minimize the use of decomposed Unicode. Unfortunately, through the + # Darwin file system, decomposed UTF-8 strings are leaked into user + # space nevertheless. + # Then there are also the locales with encodings other than US-ASCII + # and UTF-8. These locales can be occasionally useful to users (e.g. + # when grepping through ISO-8859-1 encoded text files), when all their + # file names are in US-ASCII. + echo "ISO8859-1 ISO-8859-1" + echo "ISO8859-2 ISO-8859-2" + echo "ISO8859-4 ISO-8859-4" + echo "ISO8859-5 ISO-8859-5" + echo "ISO8859-7 ISO-8859-7" + echo "ISO8859-9 ISO-8859-9" + echo "ISO8859-13 ISO-8859-13" + echo "ISO8859-15 ISO-8859-15" + echo "KOI8-R KOI8-R" + echo "KOI8-U KOI8-U" + echo "CP866 CP866" + echo "CP949 CP949" + echo "CP1131 CP1131" + echo "CP1251 CP1251" + echo "eucCN GB2312" + echo "GB2312 GB2312" + echo "eucJP EUC-JP" + echo "eucKR EUC-KR" + echo "Big5 BIG5" + echo "Big5HKSCS BIG5-HKSCS" + echo "GBK GBK" + echo "GB18030 GB18030" + echo "SJIS SHIFT_JIS" + echo "ARMSCII-8 ARMSCII-8" + echo "PT154 PT154" + #echo "ISCII-DEV ?" + echo "* UTF-8" + ;; + beos* | haiku*) + # BeOS and Haiku have a single locale, and it has UTF-8 encoding. + echo "* UTF-8" + ;; + msdosdjgpp*) + # DJGPP 2.03 doesn't have nl_langinfo(CODESET); therefore + # localcharset.c falls back to using the full locale name + # from the environment variables. + echo "#" + echo "# The encodings given here may not all be correct." + echo "# If you find that the encoding given for your language and" + echo "# country is not the one your DOS machine actually uses, just" + echo "# correct it in this file, and send a mail to" + echo "# Juan Manuel Guerrero " + echo "# and Bruno Haible ." + echo "#" + echo "C ASCII" + # ISO-8859-1 languages + echo "ca CP850" + echo "ca_ES CP850" + echo "da CP865" # not CP850 ?? + echo "da_DK CP865" # not CP850 ?? + echo "de CP850" + echo "de_AT CP850" + echo "de_CH CP850" + echo "de_DE CP850" + echo "en CP850" + echo "en_AU CP850" # not CP437 ?? + echo "en_CA CP850" + echo "en_GB CP850" + echo "en_NZ CP437" + echo "en_US CP437" + echo "en_ZA CP850" # not CP437 ?? + echo "es CP850" + echo "es_AR CP850" + echo "es_BO CP850" + echo "es_CL CP850" + echo "es_CO CP850" + echo "es_CR CP850" + echo "es_CU CP850" + echo "es_DO CP850" + echo "es_EC CP850" + echo "es_ES CP850" + echo "es_GT CP850" + echo "es_HN CP850" + echo "es_MX CP850" + echo "es_NI CP850" + echo "es_PA CP850" + echo "es_PY CP850" + echo "es_PE CP850" + echo "es_SV CP850" + echo "es_UY CP850" + echo "es_VE CP850" + echo "et CP850" + echo "et_EE CP850" + echo "eu CP850" + echo "eu_ES CP850" + echo "fi CP850" + echo "fi_FI CP850" + echo "fr CP850" + echo "fr_BE CP850" + echo "fr_CA CP850" + echo "fr_CH CP850" + echo "fr_FR CP850" + echo "ga CP850" + echo "ga_IE CP850" + echo "gd CP850" + echo "gd_GB CP850" + echo "gl CP850" + echo "gl_ES CP850" + echo "id CP850" # not CP437 ?? + echo "id_ID CP850" # not CP437 ?? + echo "is CP861" # not CP850 ?? + echo "is_IS CP861" # not CP850 ?? + echo "it CP850" + echo "it_CH CP850" + echo "it_IT CP850" + echo "lt CP775" + echo "lt_LT CP775" + echo "lv CP775" + echo "lv_LV CP775" + echo "nb CP865" # not CP850 ?? + echo "nb_NO CP865" # not CP850 ?? + echo "nl CP850" + echo "nl_BE CP850" + echo "nl_NL CP850" + echo "nn CP865" # not CP850 ?? + echo "nn_NO CP865" # not CP850 ?? + echo "no CP865" # not CP850 ?? + echo "no_NO CP865" # not CP850 ?? + echo "pt CP850" + echo "pt_BR CP850" + echo "pt_PT CP850" + echo "sv CP850" + echo "sv_SE CP850" + # ISO-8859-2 languages + echo "cs CP852" + echo "cs_CZ CP852" + echo "hr CP852" + echo "hr_HR CP852" + echo "hu CP852" + echo "hu_HU CP852" + echo "pl CP852" + echo "pl_PL CP852" + echo "ro CP852" + echo "ro_RO CP852" + echo "sk CP852" + echo "sk_SK CP852" + echo "sl CP852" + echo "sl_SI CP852" + echo "sq CP852" + echo "sq_AL CP852" + echo "sr CP852" # CP852 or CP866 or CP855 ?? + echo "sr_CS CP852" # CP852 or CP866 or CP855 ?? + echo "sr_YU CP852" # CP852 or CP866 or CP855 ?? + # ISO-8859-3 languages + echo "mt CP850" + echo "mt_MT CP850" + # ISO-8859-5 languages + echo "be CP866" + echo "be_BE CP866" + echo "bg CP866" # not CP855 ?? + echo "bg_BG CP866" # not CP855 ?? + echo "mk CP866" # not CP855 ?? + echo "mk_MK CP866" # not CP855 ?? + echo "ru CP866" + echo "ru_RU CP866" + echo "uk CP1125" + echo "uk_UA CP1125" + # ISO-8859-6 languages + echo "ar CP864" + echo "ar_AE CP864" + echo "ar_DZ CP864" + echo "ar_EG CP864" + echo "ar_IQ CP864" + echo "ar_IR CP864" + echo "ar_JO CP864" + echo "ar_KW CP864" + echo "ar_MA CP864" + echo "ar_OM CP864" + echo "ar_QA CP864" + echo "ar_SA CP864" + echo "ar_SY CP864" + # ISO-8859-7 languages + echo "el CP869" + echo "el_GR CP869" + # ISO-8859-8 languages + echo "he CP862" + echo "he_IL CP862" + # ISO-8859-9 languages + echo "tr CP857" + echo "tr_TR CP857" + # Japanese + echo "ja CP932" + echo "ja_JP CP932" + # Chinese + echo "zh_CN GBK" + echo "zh_TW CP950" # not CP938 ?? + # Korean + echo "kr CP949" # not CP934 ?? + echo "kr_KR CP949" # not CP934 ?? + # Thai + echo "th CP874" + echo "th_TH CP874" + # Other + echo "eo CP850" + echo "eo_EO CP850" + ;; +esac diff --git a/lib/exitfail.c b/lib/exitfail.c new file mode 100644 index 000000000..b0b4ebe45 --- /dev/null +++ b/lib/exitfail.c @@ -0,0 +1,24 @@ +/* Failure exit status + + Copyright (C) 2002-2003, 2005-2007, 2009-2013 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "exitfail.h" + +#include + +int volatile exit_failure = EXIT_FAILURE; diff --git a/lib/exitfail.h b/lib/exitfail.h new file mode 100644 index 000000000..e54333bdd --- /dev/null +++ b/lib/exitfail.h @@ -0,0 +1,18 @@ +/* Failure exit status + + Copyright (C) 2002, 2009-2013 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +extern int volatile exit_failure; diff --git a/lib/isatty.c b/lib/isatty.c new file mode 100644 index 000000000..36fbc36e2 --- /dev/null +++ b/lib/isatty.c @@ -0,0 +1,81 @@ +/* isatty() replacement. + Copyright (C) 2012-2013 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include + +/* This replacement is enabled on native Windows. */ + +#include + +/* Get declarations of the Win32 API functions. */ +#define WIN32_LEAN_AND_MEAN +#include + +#include "msvc-inval.h" + +/* Get _get_osfhandle(). */ +#include "msvc-nothrow.h" + +/* Optimized test whether a HANDLE refers to a console. + See . */ +#define IsConsoleHandle(h) (((intptr_t) (h) & 3) == 3) + +#if HAVE_MSVC_INVALID_PARAMETER_HANDLER +static int +_isatty_nothrow (int fd) +{ + int result; + + TRY_MSVC_INVAL + { + result = _isatty (fd); + } + CATCH_MSVC_INVAL + { + result = 0; + } + DONE_MSVC_INVAL; + + return result; +} +#else +# define _isatty_nothrow _isatty +#endif + +/* Determine whether FD refers to a console device. Return 1 if yes. + Return 0 and set errno if no. (ptsname_r relies on the errno value.) */ +int +isatty (int fd) +{ + HANDLE h = (HANDLE) _get_osfhandle (fd); + if (h == INVALID_HANDLE_VALUE) + { + errno = EBADF; + return 0; + } + /* _isatty (fd) tests whether GetFileType of the handle is FILE_TYPE_CHAR. + But it does not set errno when it returns 0. */ + if (_isatty_nothrow (fd)) + { + if (IsConsoleHandle (h)) + return 1; + } + errno = ENOTTY; + return 0; +} diff --git a/lib/localcharset.c b/lib/localcharset.c new file mode 100644 index 000000000..953cc1e70 --- /dev/null +++ b/lib/localcharset.c @@ -0,0 +1,553 @@ +/* Determine a canonical name for the current locale's character encoding. + + Copyright (C) 2000-2006, 2008-2013 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, see . */ + +/* Written by Bruno Haible . */ + +#include + +/* Specification. */ +#include "localcharset.h" + +#include +#include +#include +#include +#include + +#if defined __APPLE__ && defined __MACH__ && HAVE_LANGINFO_CODESET +# define DARWIN7 /* Darwin 7 or newer, i.e. Mac OS X 10.3 or newer */ +#endif + +#if defined _WIN32 || defined __WIN32__ +# define WINDOWS_NATIVE +#endif + +#if defined __EMX__ +/* Assume EMX program runs on OS/2, even if compiled under DOS. */ +# ifndef OS2 +# define OS2 +# endif +#endif + +#if !defined WINDOWS_NATIVE +# include +# if HAVE_LANGINFO_CODESET +# include +# else +# if 0 /* see comment below */ +# include +# endif +# endif +# ifdef __CYGWIN__ +# define WIN32_LEAN_AND_MEAN +# include +# endif +#elif defined WINDOWS_NATIVE +# define WIN32_LEAN_AND_MEAN +# include +#endif +#if defined OS2 +# define INCL_DOS +# include +#endif + +#if ENABLE_RELOCATABLE +# include "relocatable.h" +#else +# define relocate(pathname) (pathname) +#endif + +/* Get LIBDIR. */ +#ifndef LIBDIR +# include "configmake.h" +#endif + +/* Define O_NOFOLLOW to 0 on platforms where it does not exist. */ +#ifndef O_NOFOLLOW +# define O_NOFOLLOW 0 +#endif + +#if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ || defined __EMX__ || defined __DJGPP__ + /* Native Windows, Cygwin, OS/2, DOS */ +# define ISSLASH(C) ((C) == '/' || (C) == '\\') +#endif + +#ifndef DIRECTORY_SEPARATOR +# define DIRECTORY_SEPARATOR '/' +#endif + +#ifndef ISSLASH +# define ISSLASH(C) ((C) == DIRECTORY_SEPARATOR) +#endif + +#if HAVE_DECL_GETC_UNLOCKED +# undef getc +# define getc getc_unlocked +#endif + +/* The following static variable is declared 'volatile' to avoid a + possible multithread problem in the function get_charset_aliases. If we + are running in a threaded environment, and if two threads initialize + 'charset_aliases' simultaneously, both will produce the same value, + and everything will be ok if the two assignments to 'charset_aliases' + are atomic. But I don't know what will happen if the two assignments mix. */ +#if __STDC__ != 1 +# define volatile /* empty */ +#endif +/* Pointer to the contents of the charset.alias file, if it has already been + read, else NULL. Its format is: + ALIAS_1 '\0' CANONICAL_1 '\0' ... ALIAS_n '\0' CANONICAL_n '\0' '\0' */ +static const char * volatile charset_aliases; + +/* Return a pointer to the contents of the charset.alias file. */ +static const char * +get_charset_aliases (void) +{ + const char *cp; + + cp = charset_aliases; + if (cp == NULL) + { +#if !(defined DARWIN7 || defined VMS || defined WINDOWS_NATIVE || defined __CYGWIN__) + const char *dir; + const char *base = "charset.alias"; + char *file_name; + + /* Make it possible to override the charset.alias location. This is + necessary for running the testsuite before "make install". */ + dir = getenv ("CHARSETALIASDIR"); + if (dir == NULL || dir[0] == '\0') + dir = relocate (LIBDIR); + + /* Concatenate dir and base into freshly allocated file_name. */ + { + size_t dir_len = strlen (dir); + size_t base_len = strlen (base); + int add_slash = (dir_len > 0 && !ISSLASH (dir[dir_len - 1])); + file_name = (char *) malloc (dir_len + add_slash + base_len + 1); + if (file_name != NULL) + { + memcpy (file_name, dir, dir_len); + if (add_slash) + file_name[dir_len] = DIRECTORY_SEPARATOR; + memcpy (file_name + dir_len + add_slash, base, base_len + 1); + } + } + + if (file_name == NULL) + /* Out of memory. Treat the file as empty. */ + cp = ""; + else + { + int fd; + + /* Open the file. Reject symbolic links on platforms that support + O_NOFOLLOW. This is a security feature. Without it, an attacker + could retrieve parts of the contents (namely, the tail of the + first line that starts with "* ") of an arbitrary file by placing + a symbolic link to that file under the name "charset.alias" in + some writable directory and defining the environment variable + CHARSETALIASDIR to point to that directory. */ + fd = open (file_name, + O_RDONLY | (HAVE_WORKING_O_NOFOLLOW ? O_NOFOLLOW : 0)); + if (fd < 0) + /* File not found. Treat it as empty. */ + cp = ""; + else + { + FILE *fp; + + fp = fdopen (fd, "r"); + if (fp == NULL) + { + /* Out of memory. Treat the file as empty. */ + close (fd); + cp = ""; + } + else + { + /* Parse the file's contents. */ + char *res_ptr = NULL; + size_t res_size = 0; + + for (;;) + { + int c; + char buf1[50+1]; + char buf2[50+1]; + size_t l1, l2; + char *old_res_ptr; + + c = getc (fp); + if (c == EOF) + break; + if (c == '\n' || c == ' ' || c == '\t') + continue; + if (c == '#') + { + /* Skip comment, to end of line. */ + do + c = getc (fp); + while (!(c == EOF || c == '\n')); + if (c == EOF) + break; + continue; + } + ungetc (c, fp); + if (fscanf (fp, "%50s %50s", buf1, buf2) < 2) + break; + l1 = strlen (buf1); + l2 = strlen (buf2); + old_res_ptr = res_ptr; + if (res_size == 0) + { + res_size = l1 + 1 + l2 + 1; + res_ptr = (char *) malloc (res_size + 1); + } + else + { + res_size += l1 + 1 + l2 + 1; + res_ptr = (char *) realloc (res_ptr, res_size + 1); + } + if (res_ptr == NULL) + { + /* Out of memory. */ + res_size = 0; + free (old_res_ptr); + break; + } + strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1); + strcpy (res_ptr + res_size - (l2 + 1), buf2); + } + fclose (fp); + if (res_size == 0) + cp = ""; + else + { + *(res_ptr + res_size) = '\0'; + cp = res_ptr; + } + } + } + + free (file_name); + } + +#else + +# if defined DARWIN7 + /* To avoid the trouble of installing a file that is shared by many + GNU packages -- many packaging systems have problems with this --, + simply inline the aliases here. */ + cp = "ISO8859-1" "\0" "ISO-8859-1" "\0" + "ISO8859-2" "\0" "ISO-8859-2" "\0" + "ISO8859-4" "\0" "ISO-8859-4" "\0" + "ISO8859-5" "\0" "ISO-8859-5" "\0" + "ISO8859-7" "\0" "ISO-8859-7" "\0" + "ISO8859-9" "\0" "ISO-8859-9" "\0" + "ISO8859-13" "\0" "ISO-8859-13" "\0" + "ISO8859-15" "\0" "ISO-8859-15" "\0" + "KOI8-R" "\0" "KOI8-R" "\0" + "KOI8-U" "\0" "KOI8-U" "\0" + "CP866" "\0" "CP866" "\0" + "CP949" "\0" "CP949" "\0" + "CP1131" "\0" "CP1131" "\0" + "CP1251" "\0" "CP1251" "\0" + "eucCN" "\0" "GB2312" "\0" + "GB2312" "\0" "GB2312" "\0" + "eucJP" "\0" "EUC-JP" "\0" + "eucKR" "\0" "EUC-KR" "\0" + "Big5" "\0" "BIG5" "\0" + "Big5HKSCS" "\0" "BIG5-HKSCS" "\0" + "GBK" "\0" "GBK" "\0" + "GB18030" "\0" "GB18030" "\0" + "SJIS" "\0" "SHIFT_JIS" "\0" + "ARMSCII-8" "\0" "ARMSCII-8" "\0" + "PT154" "\0" "PT154" "\0" + /*"ISCII-DEV" "\0" "?" "\0"*/ + "*" "\0" "UTF-8" "\0"; +# endif + +# if defined VMS + /* To avoid the troubles of an extra file charset.alias_vms in the + sources of many GNU packages, simply inline the aliases here. */ + /* The list of encodings is taken from the OpenVMS 7.3-1 documentation + "Compaq C Run-Time Library Reference Manual for OpenVMS systems" + section 10.7 "Handling Different Character Sets". */ + cp = "ISO8859-1" "\0" "ISO-8859-1" "\0" + "ISO8859-2" "\0" "ISO-8859-2" "\0" + "ISO8859-5" "\0" "ISO-8859-5" "\0" + "ISO8859-7" "\0" "ISO-8859-7" "\0" + "ISO8859-8" "\0" "ISO-8859-8" "\0" + "ISO8859-9" "\0" "ISO-8859-9" "\0" + /* Japanese */ + "eucJP" "\0" "EUC-JP" "\0" + "SJIS" "\0" "SHIFT_JIS" "\0" + "DECKANJI" "\0" "DEC-KANJI" "\0" + "SDECKANJI" "\0" "EUC-JP" "\0" + /* Chinese */ + "eucTW" "\0" "EUC-TW" "\0" + "DECHANYU" "\0" "DEC-HANYU" "\0" + "DECHANZI" "\0" "GB2312" "\0" + /* Korean */ + "DECKOREAN" "\0" "EUC-KR" "\0"; +# endif + +# if defined WINDOWS_NATIVE || defined __CYGWIN__ + /* To avoid the troubles of installing a separate file in the same + directory as the DLL and of retrieving the DLL's directory at + runtime, simply inline the aliases here. */ + + cp = "CP936" "\0" "GBK" "\0" + "CP1361" "\0" "JOHAB" "\0" + "CP20127" "\0" "ASCII" "\0" + "CP20866" "\0" "KOI8-R" "\0" + "CP20936" "\0" "GB2312" "\0" + "CP21866" "\0" "KOI8-RU" "\0" + "CP28591" "\0" "ISO-8859-1" "\0" + "CP28592" "\0" "ISO-8859-2" "\0" + "CP28593" "\0" "ISO-8859-3" "\0" + "CP28594" "\0" "ISO-8859-4" "\0" + "CP28595" "\0" "ISO-8859-5" "\0" + "CP28596" "\0" "ISO-8859-6" "\0" + "CP28597" "\0" "ISO-8859-7" "\0" + "CP28598" "\0" "ISO-8859-8" "\0" + "CP28599" "\0" "ISO-8859-9" "\0" + "CP28605" "\0" "ISO-8859-15" "\0" + "CP38598" "\0" "ISO-8859-8" "\0" + "CP51932" "\0" "EUC-JP" "\0" + "CP51936" "\0" "GB2312" "\0" + "CP51949" "\0" "EUC-KR" "\0" + "CP51950" "\0" "EUC-TW" "\0" + "CP54936" "\0" "GB18030" "\0" + "CP65001" "\0" "UTF-8" "\0"; +# endif +#endif + + charset_aliases = cp; + } + + return cp; +} + +/* Determine the current locale's character encoding, and canonicalize it + into one of the canonical names listed in config.charset. + The result must not be freed; it is statically allocated. + If the canonical name cannot be determined, the result is a non-canonical + name. */ + +#ifdef STATIC +STATIC +#endif +const char * +locale_charset (void) +{ + const char *codeset; + const char *aliases; + +#if !(defined WINDOWS_NATIVE || defined OS2) + +# if HAVE_LANGINFO_CODESET + + /* Most systems support nl_langinfo (CODESET) nowadays. */ + codeset = nl_langinfo (CODESET); + +# ifdef __CYGWIN__ + /* Cygwin < 1.7 does not have locales. nl_langinfo (CODESET) always + returns "US-ASCII". Return the suffix of the locale name from the + environment variables (if present) or the codepage as a number. */ + if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0) + { + const char *locale; + static char buf[2 + 10 + 1]; + + locale = getenv ("LC_ALL"); + if (locale == NULL || locale[0] == '\0') + { + locale = getenv ("LC_CTYPE"); + if (locale == NULL || locale[0] == '\0') + locale = getenv ("LANG"); + } + if (locale != NULL && locale[0] != '\0') + { + /* If the locale name contains an encoding after the dot, return + it. */ + const char *dot = strchr (locale, '.'); + + if (dot != NULL) + { + const char *modifier; + + dot++; + /* Look for the possible @... trailer and remove it, if any. */ + modifier = strchr (dot, '@'); + if (modifier == NULL) + return dot; + if (modifier - dot < sizeof (buf)) + { + memcpy (buf, dot, modifier - dot); + buf [modifier - dot] = '\0'; + return buf; + } + } + } + + /* The Windows API has a function returning the locale's codepage as a + number: GetACP(). This encoding is used by Cygwin, unless the user + has set the environment variable CYGWIN=codepage:oem (which very few + people do). + Output directed to console windows needs to be converted (to + GetOEMCP() if the console is using a raster font, or to + GetConsoleOutputCP() if it is using a TrueType font). Cygwin does + this conversion transparently (see winsup/cygwin/fhandler_console.cc), + converting to GetConsoleOutputCP(). This leads to correct results, + except when SetConsoleOutputCP has been called and a raster font is + in use. */ + sprintf (buf, "CP%u", GetACP ()); + codeset = buf; + } +# endif + +# else + + /* On old systems which lack it, use setlocale or getenv. */ + const char *locale = NULL; + + /* But most old systems don't have a complete set of locales. Some + (like SunOS 4 or DJGPP) have only the C locale. Therefore we don't + use setlocale here; it would return "C" when it doesn't support the + locale name the user has set. */ +# if 0 + locale = setlocale (LC_CTYPE, NULL); +# endif + if (locale == NULL || locale[0] == '\0') + { + locale = getenv ("LC_ALL"); + if (locale == NULL || locale[0] == '\0') + { + locale = getenv ("LC_CTYPE"); + if (locale == NULL || locale[0] == '\0') + locale = getenv ("LANG"); + } + } + + /* On some old systems, one used to set locale = "iso8859_1". On others, + you set it to "language_COUNTRY.charset". In any case, we resolve it + through the charset.alias file. */ + codeset = locale; + +# endif + +#elif defined WINDOWS_NATIVE + + static char buf[2 + 10 + 1]; + + /* The Windows API has a function returning the locale's codepage as a + number: GetACP(). + When the output goes to a console window, it needs to be provided in + GetOEMCP() encoding if the console is using a raster font, or in + GetConsoleOutputCP() encoding if it is using a TrueType font. + But in GUI programs and for output sent to files and pipes, GetACP() + encoding is the best bet. */ + sprintf (buf, "CP%u", GetACP ()); + codeset = buf; + +#elif defined OS2 + + const char *locale; + static char buf[2 + 10 + 1]; + ULONG cp[3]; + ULONG cplen; + + /* Allow user to override the codeset, as set in the operating system, + with standard language environment variables. */ + locale = getenv ("LC_ALL"); + if (locale == NULL || locale[0] == '\0') + { + locale = getenv ("LC_CTYPE"); + if (locale == NULL || locale[0] == '\0') + locale = getenv ("LANG"); + } + if (locale != NULL && locale[0] != '\0') + { + /* If the locale name contains an encoding after the dot, return it. */ + const char *dot = strchr (locale, '.'); + + if (dot != NULL) + { + const char *modifier; + + dot++; + /* Look for the possible @... trailer and remove it, if any. */ + modifier = strchr (dot, '@'); + if (modifier == NULL) + return dot; + if (modifier - dot < sizeof (buf)) + { + memcpy (buf, dot, modifier - dot); + buf [modifier - dot] = '\0'; + return buf; + } + } + + /* Resolve through the charset.alias file. */ + codeset = locale; + } + else + { + /* OS/2 has a function returning the locale's codepage as a number. */ + if (DosQueryCp (sizeof (cp), cp, &cplen)) + codeset = ""; + else + { + sprintf (buf, "CP%u", cp[0]); + codeset = buf; + } + } + +#endif + + if (codeset == NULL) + /* The canonical name cannot be determined. */ + codeset = ""; + + /* Resolve alias. */ + for (aliases = get_charset_aliases (); + *aliases != '\0'; + aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1) + if (strcmp (codeset, aliases) == 0 + || (aliases[0] == '*' && aliases[1] == '\0')) + { + codeset = aliases + strlen (aliases) + 1; + break; + } + + /* Don't return an empty string. GNU libc and GNU libiconv interpret + the empty string as denoting "the locale's character encoding", + thus GNU libiconv would call this function a second time. */ + if (codeset[0] == '\0') + codeset = "ASCII"; + +#ifdef DARWIN7 + /* Mac OS X sets MB_CUR_MAX to 1 when LC_ALL=C, and "UTF-8" + (the default codeset) does not work when MB_CUR_MAX is 1. */ + if (strcmp (codeset, "UTF-8") == 0 && MB_CUR_MAX <= 1) + codeset = "ASCII"; +#endif + + return codeset; +} diff --git a/lib/localcharset.h b/lib/localcharset.h new file mode 100644 index 000000000..c20982986 --- /dev/null +++ b/lib/localcharset.h @@ -0,0 +1,40 @@ +/* Determine a canonical name for the current locale's character encoding. + Copyright (C) 2000-2003, 2009-2013 Free Software Foundation, Inc. + This file is part of the GNU CHARSET Library. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, see . */ + +#ifndef _LOCALCHARSET_H +#define _LOCALCHARSET_H + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Determine the current locale's character encoding, and canonicalize it + into one of the canonical names listed in config.charset. + The result must not be freed; it is statically allocated. + If the canonical name cannot be determined, the result is a non-canonical + name. */ +extern const char * locale_charset (void); + + +#ifdef __cplusplus +} +#endif + + +#endif /* _LOCALCHARSET_H */ diff --git a/lib/mbrtowc.c b/lib/mbrtowc.c new file mode 100644 index 000000000..5ee44aea4 --- /dev/null +++ b/lib/mbrtowc.c @@ -0,0 +1,402 @@ +/* Convert multibyte character to wide character. + Copyright (C) 1999-2002, 2005-2013 Free Software Foundation, Inc. + Written by Bruno Haible , 2008. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include + +#if GNULIB_defined_mbstate_t +/* Implement mbrtowc() on top of mbtowc(). */ + +# include +# include + +# include "localcharset.h" +# include "streq.h" +# include "verify.h" + + +verify (sizeof (mbstate_t) >= 4); + +static char internal_state[4]; + +size_t +mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) +{ + char *pstate = (char *)ps; + + if (s == NULL) + { + pwc = NULL; + s = ""; + n = 1; + } + + if (n == 0) + return (size_t)(-2); + + /* Here n > 0. */ + + if (pstate == NULL) + pstate = internal_state; + + { + size_t nstate = pstate[0]; + char buf[4]; + const char *p; + size_t m; + + switch (nstate) + { + case 0: + p = s; + m = n; + break; + case 3: + buf[2] = pstate[3]; + /*FALLTHROUGH*/ + case 2: + buf[1] = pstate[2]; + /*FALLTHROUGH*/ + case 1: + buf[0] = pstate[1]; + p = buf; + m = nstate; + buf[m++] = s[0]; + if (n >= 2 && m < 4) + { + buf[m++] = s[1]; + if (n >= 3 && m < 4) + buf[m++] = s[2]; + } + break; + default: + errno = EINVAL; + return (size_t)(-1); + } + + /* Here m > 0. */ + +# if __GLIBC__ || defined __UCLIBC__ + /* Work around bug */ + mbtowc (NULL, NULL, 0); +# endif + { + int res = mbtowc (pwc, p, m); + + if (res >= 0) + { + if (pwc != NULL && ((*pwc == 0) != (res == 0))) + abort (); + if (nstate >= (res > 0 ? res : 1)) + abort (); + res -= nstate; + pstate[0] = 0; + return res; + } + + /* mbtowc does not distinguish between invalid and incomplete multibyte + sequences. But mbrtowc needs to make this distinction. + There are two possible approaches: + - Use iconv() and its return value. + - Use built-in knowledge about the possible encodings. + Given the low quality of implementation of iconv() on the systems that + lack mbrtowc(), we use the second approach. + The possible encodings are: + - 8-bit encodings, + - EUC-JP, EUC-KR, GB2312, EUC-TW, BIG5, GB18030, SJIS, + - UTF-8. + Use specialized code for each. */ + if (m >= 4 || m >= MB_CUR_MAX) + goto invalid; + /* Here MB_CUR_MAX > 1 and 0 < m < 4. */ + { + const char *encoding = locale_charset (); + + if (STREQ_OPT (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0, 0)) + { + /* Cf. unistr/u8-mblen.c. */ + unsigned char c = (unsigned char) p[0]; + + if (c >= 0xc2) + { + if (c < 0xe0) + { + if (m == 1) + goto incomplete; + } + else if (c < 0xf0) + { + if (m == 1) + goto incomplete; + if (m == 2) + { + unsigned char c2 = (unsigned char) p[1]; + + if ((c2 ^ 0x80) < 0x40 + && (c >= 0xe1 || c2 >= 0xa0) + && (c != 0xed || c2 < 0xa0)) + goto incomplete; + } + } + else if (c <= 0xf4) + { + if (m == 1) + goto incomplete; + else /* m == 2 || m == 3 */ + { + unsigned char c2 = (unsigned char) p[1]; + + if ((c2 ^ 0x80) < 0x40 + && (c >= 0xf1 || c2 >= 0x90) + && (c < 0xf4 || (c == 0xf4 && c2 < 0x90))) + { + if (m == 2) + goto incomplete; + else /* m == 3 */ + { + unsigned char c3 = (unsigned char) p[2]; + + if ((c3 ^ 0x80) < 0x40) + goto incomplete; + } + } + } + } + } + goto invalid; + } + + /* As a reference for this code, you can use the GNU libiconv + implementation. Look for uses of the RET_TOOFEW macro. */ + + if (STREQ_OPT (encoding, + "EUC-JP", 'E', 'U', 'C', '-', 'J', 'P', 0, 0, 0)) + { + if (m == 1) + { + unsigned char c = (unsigned char) p[0]; + + if ((c >= 0xa1 && c < 0xff) || c == 0x8e || c == 0x8f) + goto incomplete; + } + if (m == 2) + { + unsigned char c = (unsigned char) p[0]; + + if (c == 0x8f) + { + unsigned char c2 = (unsigned char) p[1]; + + if (c2 >= 0xa1 && c2 < 0xff) + goto incomplete; + } + } + goto invalid; + } + if (STREQ_OPT (encoding, + "EUC-KR", 'E', 'U', 'C', '-', 'K', 'R', 0, 0, 0) + || STREQ_OPT (encoding, + "GB2312", 'G', 'B', '2', '3', '1', '2', 0, 0, 0) + || STREQ_OPT (encoding, + "BIG5", 'B', 'I', 'G', '5', 0, 0, 0, 0, 0)) + { + if (m == 1) + { + unsigned char c = (unsigned char) p[0]; + + if (c >= 0xa1 && c < 0xff) + goto incomplete; + } + goto invalid; + } + if (STREQ_OPT (encoding, + "EUC-TW", 'E', 'U', 'C', '-', 'T', 'W', 0, 0, 0)) + { + if (m == 1) + { + unsigned char c = (unsigned char) p[0]; + + if ((c >= 0xa1 && c < 0xff) || c == 0x8e) + goto incomplete; + } + else /* m == 2 || m == 3 */ + { + unsigned char c = (unsigned char) p[0]; + + if (c == 0x8e) + goto incomplete; + } + goto invalid; + } + if (STREQ_OPT (encoding, + "GB18030", 'G', 'B', '1', '8', '0', '3', '0', 0, 0)) + { + if (m == 1) + { + unsigned char c = (unsigned char) p[0]; + + if ((c >= 0x90 && c <= 0xe3) || (c >= 0xf8 && c <= 0xfe)) + goto incomplete; + } + else /* m == 2 || m == 3 */ + { + unsigned char c = (unsigned char) p[0]; + + if (c >= 0x90 && c <= 0xe3) + { + unsigned char c2 = (unsigned char) p[1]; + + if (c2 >= 0x30 && c2 <= 0x39) + { + if (m == 2) + goto incomplete; + else /* m == 3 */ + { + unsigned char c3 = (unsigned char) p[2]; + + if (c3 >= 0x81 && c3 <= 0xfe) + goto incomplete; + } + } + } + } + goto invalid; + } + if (STREQ_OPT (encoding, "SJIS", 'S', 'J', 'I', 'S', 0, 0, 0, 0, 0)) + { + if (m == 1) + { + unsigned char c = (unsigned char) p[0]; + + if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea) + || (c >= 0xf0 && c <= 0xf9)) + goto incomplete; + } + goto invalid; + } + + /* An unknown multibyte encoding. */ + goto incomplete; + } + + incomplete: + { + size_t k = nstate; + /* Here 0 <= k < m < 4. */ + pstate[++k] = s[0]; + if (k < m) + { + pstate[++k] = s[1]; + if (k < m) + pstate[++k] = s[2]; + } + if (k != m) + abort (); + } + pstate[0] = m; + return (size_t)(-2); + + invalid: + errno = EILSEQ; + /* The conversion state is undefined, says POSIX. */ + return (size_t)(-1); + } + } +} + +#else +/* Override the system's mbrtowc() function. */ + +# undef mbrtowc + +size_t +rpl_mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) +{ +# if MBRTOWC_NULL_ARG2_BUG || MBRTOWC_RETVAL_BUG + if (s == NULL) + { + pwc = NULL; + s = ""; + n = 1; + } +# endif + +# if MBRTOWC_RETVAL_BUG + { + static mbstate_t internal_state; + + /* Override mbrtowc's internal state. We cannot call mbsinit() on the + hidden internal state, but we can call it on our variable. */ + if (ps == NULL) + ps = &internal_state; + + if (!mbsinit (ps)) + { + /* Parse the rest of the multibyte character byte for byte. */ + size_t count = 0; + for (; n > 0; s++, n--) + { + wchar_t wc; + size_t ret = mbrtowc (&wc, s, 1, ps); + + if (ret == (size_t)(-1)) + return (size_t)(-1); + count++; + if (ret != (size_t)(-2)) + { + /* The multibyte character has been completed. */ + if (pwc != NULL) + *pwc = wc; + return (wc == 0 ? 0 : count); + } + } + return (size_t)(-2); + } + } +# endif + +# if MBRTOWC_NUL_RETVAL_BUG + { + wchar_t wc; + size_t ret = mbrtowc (&wc, s, n, ps); + + if (ret != (size_t)(-1) && ret != (size_t)(-2)) + { + if (pwc != NULL) + *pwc = wc; + if (wc == 0) + ret = 0; + } + return ret; + } +# else + { +# if MBRTOWC_NULL_ARG1_BUG + wchar_t dummy; + + if (pwc == NULL) + pwc = &dummy; +# endif + + return mbrtowc (pwc, s, n, ps); + } +# endif +} + +#endif diff --git a/lib/mbsinit.c b/lib/mbsinit.c new file mode 100644 index 000000000..26fbb7fa2 --- /dev/null +++ b/lib/mbsinit.c @@ -0,0 +1,61 @@ +/* Test for initial conversion state. + Copyright (C) 2008-2013 Free Software Foundation, Inc. + Written by Bruno Haible , 2008. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include + +#include "verify.h" + +#if (defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__ + +/* On native Windows, 'mbstate_t' is defined as 'int'. */ + +int +mbsinit (const mbstate_t *ps) +{ + return ps == NULL || *ps == 0; +} + +#else + +/* Platforms that lack mbsinit() also lack mbrlen(), mbrtowc(), mbsrtowcs() + and wcrtomb(), wcsrtombs(). + We assume that + - sizeof (mbstate_t) >= 4, + - only stateless encodings are supported (such as UTF-8 and EUC-JP, but + not ISO-2022 variants), + - for each encoding, the number of bytes for a wide character is <= 4. + (This maximum is attained for UTF-8, GB18030, EUC-TW.) + We define the meaning of mbstate_t as follows: + - In mb -> wc direction, mbstate_t's first byte contains the number of + buffered bytes (in the range 0..3), followed by up to 3 buffered bytes. + - In wc -> mb direction, mbstate_t contains no information. In other + words, it is always in the initial state. */ + +verify (sizeof (mbstate_t) >= 4); + +int +mbsinit (const mbstate_t *ps) +{ + const char *pstate = (const char *)ps; + + return pstate == NULL || pstate[0] == 0; +} + +#endif diff --git a/lib/quote.h b/lib/quote.h new file mode 100644 index 000000000..167979676 --- /dev/null +++ b/lib/quote.h @@ -0,0 +1,46 @@ +/* quote.h - prototypes for quote.c + + Copyright (C) 1998-2001, 2003, 2009-2013 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef QUOTE_H_ +# define QUOTE_H_ 1 + +# include + +/* The quoting options used by quote_n and quote. Its type is incomplete, + so it's useful only in expressions like '"e_quoting_options'. */ +extern struct quoting_options quote_quoting_options; + +/* Return an unambiguous printable representation of ARG (of size + ARGSIZE), allocated in slot N, suitable for diagnostics. If + ARGSIZE is SIZE_MAX, use the string length of the argument for + ARGSIZE. */ +char const *quote_n_mem (int n, char const *arg, size_t argsize); + +/* Return an unambiguous printable representation of ARG (of size + ARGSIZE), suitable for diagnostics. If ARGSIZE is SIZE_MAX, use + the string length of the argument for ARGSIZE. */ +char const *quote_mem (char const *arg, size_t argsize); + +/* Return an unambiguous printable representation of ARG, allocated in + slot N, suitable for diagnostics. */ +char const *quote_n (int n, char const *arg); + +/* Return an unambiguous printable representation of ARG, suitable for + diagnostics. */ +char const *quote (char const *arg); + +#endif /* !QUOTE_H_ */ diff --git a/lib/quotearg.c b/lib/quotearg.c new file mode 100644 index 000000000..40114d7fb --- /dev/null +++ b/lib/quotearg.c @@ -0,0 +1,968 @@ +/* quotearg.c - quote arguments for output + + Copyright (C) 1998-2002, 2004-2013 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Paul Eggert */ + +/* Without this pragma, gcc 4.7.0 20111124 mistakenly suggests that + the quoting_options_from_style function might be candidate for + attribute 'pure' */ +#if (__GNUC__ == 4 && 6 <= __GNUC_MINOR__) || 4 < __GNUC__ +# pragma GCC diagnostic ignored "-Wsuggest-attribute=pure" +#endif + +#include + +#include "quotearg.h" +#include "quote.h" + +#include "xalloc.h" +#include "c-strcaseeq.h" +#include "localcharset.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gettext.h" +#define _(msgid) gettext (msgid) +#define N_(msgid) msgid + +#ifndef SIZE_MAX +# define SIZE_MAX ((size_t) -1) +#endif + +#define INT_BITS (sizeof (int) * CHAR_BIT) + +struct quoting_options +{ + /* Basic quoting style. */ + enum quoting_style style; + + /* Additional flags. Bitwise combination of enum quoting_flags. */ + int flags; + + /* Quote the characters indicated by this bit vector even if the + quoting style would not normally require them to be quoted. */ + unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1]; + + /* The left quote for custom_quoting_style. */ + char const *left_quote; + + /* The right quote for custom_quoting_style. */ + char const *right_quote; +}; + +/* Names of quoting styles. */ +char const *const quoting_style_args[] = +{ + "literal", + "shell", + "shell-always", + "c", + "c-maybe", + "escape", + "locale", + "clocale", + 0 +}; + +/* Correspondences to quoting style names. */ +enum quoting_style const quoting_style_vals[] = +{ + literal_quoting_style, + shell_quoting_style, + shell_always_quoting_style, + c_quoting_style, + c_maybe_quoting_style, + escape_quoting_style, + locale_quoting_style, + clocale_quoting_style +}; + +/* The default quoting options. */ +static struct quoting_options default_quoting_options; + +/* Allocate a new set of quoting options, with contents initially identical + to O if O is not null, or to the default if O is null. + It is the caller's responsibility to free the result. */ +struct quoting_options * +clone_quoting_options (struct quoting_options *o) +{ + int e = errno; + struct quoting_options *p = xmemdup (o ? o : &default_quoting_options, + sizeof *o); + errno = e; + return p; +} + +/* Get the value of O's quoting style. If O is null, use the default. */ +enum quoting_style +get_quoting_style (struct quoting_options *o) +{ + return (o ? o : &default_quoting_options)->style; +} + +/* In O (or in the default if O is null), + set the value of the quoting style to S. */ +void +set_quoting_style (struct quoting_options *o, enum quoting_style s) +{ + (o ? o : &default_quoting_options)->style = s; +} + +/* In O (or in the default if O is null), + set the value of the quoting options for character C to I. + Return the old value. Currently, the only values defined for I are + 0 (the default) and 1 (which means to quote the character even if + it would not otherwise be quoted). */ +int +set_char_quoting (struct quoting_options *o, char c, int i) +{ + unsigned char uc = c; + unsigned int *p = + (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS; + int shift = uc % INT_BITS; + int r = (*p >> shift) & 1; + *p ^= ((i & 1) ^ r) << shift; + return r; +} + +/* In O (or in the default if O is null), + set the value of the quoting options flag to I, which can be a + bitwise combination of enum quoting_flags, or 0 for default + behavior. Return the old value. */ +int +set_quoting_flags (struct quoting_options *o, int i) +{ + int r; + if (!o) + o = &default_quoting_options; + r = o->flags; + o->flags = i; + return r; +} + +void +set_custom_quoting (struct quoting_options *o, + char const *left_quote, char const *right_quote) +{ + if (!o) + o = &default_quoting_options; + o->style = custom_quoting_style; + if (!left_quote || !right_quote) + abort (); + o->left_quote = left_quote; + o->right_quote = right_quote; +} + +/* Return quoting options for STYLE, with no extra quoting. */ +static struct quoting_options /* NOT PURE!! */ +quoting_options_from_style (enum quoting_style style) +{ + struct quoting_options o = { 0, 0, { 0 }, NULL, NULL }; + if (style == custom_quoting_style) + abort (); + o.style = style; + return o; +} + +/* MSGID approximates a quotation mark. Return its translation if it + has one; otherwise, return either it or "\"", depending on S. + + S is either clocale_quoting_style or locale_quoting_style. */ +static char const * +gettext_quote (char const *msgid, enum quoting_style s) +{ + char const *translation = _(msgid); + char const *locale_code; + + if (translation != msgid) + return translation; + + /* For UTF-8 and GB-18030, use single quotes U+2018 and U+2019. + Here is a list of other locales that include U+2018 and U+2019: + + ISO-8859-7 0xA1 KOI8-T 0x91 + CP869 0x8B CP874 0x91 + CP932 0x81 0x65 CP936 0xA1 0xAE + CP949 0xA1 0xAE CP950 0xA1 0xA5 + CP1250 0x91 CP1251 0x91 + CP1252 0x91 CP1253 0x91 + CP1254 0x91 CP1255 0x91 + CP1256 0x91 CP1257 0x91 + EUC-JP 0xA1 0xC6 EUC-KR 0xA1 0xAE + EUC-TW 0xA1 0xE4 BIG5 0xA1 0xA5 + BIG5-HKSCS 0xA1 0xA5 EUC-CN 0xA1 0xAE + GBK 0xA1 0xAE Georgian-PS 0x91 + PT154 0x91 + + None of these is still in wide use; using iconv is overkill. */ + locale_code = locale_charset (); + if (STRCASEEQ (locale_code, "UTF-8", 'U','T','F','-','8',0,0,0,0)) + return msgid[0] == '`' ? "\xe2\x80\x98": "\xe2\x80\x99"; + if (STRCASEEQ (locale_code, "GB18030", 'G','B','1','8','0','3','0',0,0)) + return msgid[0] == '`' ? "\xa1\ae": "\xa1\xaf"; + + return (s == clocale_quoting_style ? "\"" : "'"); +} + +/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of + argument ARG (of size ARGSIZE), using QUOTING_STYLE, FLAGS, and + QUOTE_THESE_TOO to control quoting. + Terminate the output with a null character, and return the written + size of the output, not counting the terminating null. + If BUFFERSIZE is too small to store the output string, return the + value that would have been returned had BUFFERSIZE been large enough. + If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE. + + This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG, + ARGSIZE, O), except it breaks O into its component pieces and is + not careful about errno. */ + +static size_t +quotearg_buffer_restyled (char *buffer, size_t buffersize, + char const *arg, size_t argsize, + enum quoting_style quoting_style, int flags, + unsigned int const *quote_these_too, + char const *left_quote, + char const *right_quote) +{ + size_t i; + size_t len = 0; + char const *quote_string = 0; + size_t quote_string_len = 0; + bool backslash_escapes = false; + bool unibyte_locale = MB_CUR_MAX == 1; + bool elide_outer_quotes = (flags & QA_ELIDE_OUTER_QUOTES) != 0; + +#define STORE(c) \ + do \ + { \ + if (len < buffersize) \ + buffer[len] = (c); \ + len++; \ + } \ + while (0) + + switch (quoting_style) + { + case c_maybe_quoting_style: + quoting_style = c_quoting_style; + elide_outer_quotes = true; + /* Fall through. */ + case c_quoting_style: + if (!elide_outer_quotes) + STORE ('"'); + backslash_escapes = true; + quote_string = "\""; + quote_string_len = 1; + break; + + case escape_quoting_style: + backslash_escapes = true; + elide_outer_quotes = false; + break; + + case locale_quoting_style: + case clocale_quoting_style: + case custom_quoting_style: + { + if (quoting_style != custom_quoting_style) + { + /* TRANSLATORS: + Get translations for open and closing quotation marks. + The message catalog should translate "`" to a left + quotation mark suitable for the locale, and similarly for + "'". For example, a French Unicode local should translate + these to U+00AB (LEFT-POINTING DOUBLE ANGLE + QUOTATION MARK), and U+00BB (RIGHT-POINTING DOUBLE ANGLE + QUOTATION MARK), respectively. + + If the catalog has no translation, we will try to + use Unicode U+2018 (LEFT SINGLE QUOTATION MARK) and + Unicode U+2019 (RIGHT SINGLE QUOTATION MARK). If the + current locale is not Unicode, locale_quoting_style + will quote 'like this', and clocale_quoting_style will + quote "like this". You should always include translations + for "`" and "'" even if U+2018 and U+2019 are appropriate + for your locale. + + If you don't know what to put here, please see + + and use glyphs suitable for your language. */ + left_quote = gettext_quote (N_("`"), quoting_style); + right_quote = gettext_quote (N_("'"), quoting_style); + } + if (!elide_outer_quotes) + for (quote_string = left_quote; *quote_string; quote_string++) + STORE (*quote_string); + backslash_escapes = true; + quote_string = right_quote; + quote_string_len = strlen (quote_string); + } + break; + + case shell_quoting_style: + quoting_style = shell_always_quoting_style; + elide_outer_quotes = true; + /* Fall through. */ + case shell_always_quoting_style: + if (!elide_outer_quotes) + STORE ('\''); + quote_string = "'"; + quote_string_len = 1; + break; + + case literal_quoting_style: + elide_outer_quotes = false; + break; + + default: + abort (); + } + + for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++) + { + unsigned char c; + unsigned char esc; + bool is_right_quote = false; + + if (backslash_escapes + && quote_string_len + && (i + quote_string_len + <= (argsize == SIZE_MAX && 1 < quote_string_len + /* Use strlen only if we must: when argsize is SIZE_MAX, + and when the quote string is more than 1 byte long. + If we do call strlen, save the result. */ + ? (argsize = strlen (arg)) : argsize)) + && memcmp (arg + i, quote_string, quote_string_len) == 0) + { + if (elide_outer_quotes) + goto force_outer_quoting_style; + is_right_quote = true; + } + + c = arg[i]; + switch (c) + { + case '\0': + if (backslash_escapes) + { + if (elide_outer_quotes) + goto force_outer_quoting_style; + STORE ('\\'); + /* If quote_string were to begin with digits, we'd need to + test for the end of the arg as well. However, it's + hard to imagine any locale that would use digits in + quotes, and set_custom_quoting is documented not to + accept them. */ + if (i + 1 < argsize && '0' <= arg[i + 1] && arg[i + 1] <= '9') + { + STORE ('0'); + STORE ('0'); + } + c = '0'; + /* We don't have to worry that this last '0' will be + backslash-escaped because, again, quote_string should + not start with it and because quote_these_too is + documented as not accepting it. */ + } + else if (flags & QA_ELIDE_NULL_BYTES) + continue; + break; + + case '?': + switch (quoting_style) + { + case shell_always_quoting_style: + if (elide_outer_quotes) + goto force_outer_quoting_style; + break; + + case c_quoting_style: + if ((flags & QA_SPLIT_TRIGRAPHS) + && i + 2 < argsize && arg[i + 1] == '?') + switch (arg[i + 2]) + { + case '!': case '\'': + case '(': case ')': case '-': case '/': + case '<': case '=': case '>': + /* Escape the second '?' in what would otherwise be + a trigraph. */ + if (elide_outer_quotes) + goto force_outer_quoting_style; + c = arg[i + 2]; + i += 2; + STORE ('?'); + STORE ('"'); + STORE ('"'); + STORE ('?'); + break; + + default: + break; + } + break; + + default: + break; + } + break; + + case '\a': esc = 'a'; goto c_escape; + case '\b': esc = 'b'; goto c_escape; + case '\f': esc = 'f'; goto c_escape; + case '\n': esc = 'n'; goto c_and_shell_escape; + case '\r': esc = 'r'; goto c_and_shell_escape; + case '\t': esc = 't'; goto c_and_shell_escape; + case '\v': esc = 'v'; goto c_escape; + case '\\': esc = c; + /* No need to escape the escape if we are trying to elide + outer quotes and nothing else is problematic. */ + if (backslash_escapes && elide_outer_quotes && quote_string_len) + goto store_c; + + c_and_shell_escape: + if (quoting_style == shell_always_quoting_style + && elide_outer_quotes) + goto force_outer_quoting_style; + /* Fall through. */ + c_escape: + if (backslash_escapes) + { + c = esc; + goto store_escape; + } + break; + + case '{': case '}': /* sometimes special if isolated */ + if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1)) + break; + /* Fall through. */ + case '#': case '~': + if (i != 0) + break; + /* Fall through. */ + case ' ': + case '!': /* special in bash */ + case '"': case '$': case '&': + case '(': case ')': case '*': case ';': + case '<': + case '=': /* sometimes special in 0th or (with "set -k") later args */ + case '>': case '[': + case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */ + case '`': case '|': + /* A shell special character. In theory, '$' and '`' could + be the first bytes of multibyte characters, which means + we should check them with mbrtowc, but in practice this + doesn't happen so it's not worth worrying about. */ + if (quoting_style == shell_always_quoting_style + && elide_outer_quotes) + goto force_outer_quoting_style; + break; + + case '\'': + if (quoting_style == shell_always_quoting_style) + { + if (elide_outer_quotes) + goto force_outer_quoting_style; + STORE ('\''); + STORE ('\\'); + STORE ('\''); + } + break; + + case '%': case '+': case ',': case '-': case '.': case '/': + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': case ':': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': case ']': case '_': case 'a': case 'b': + case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': + case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': + case 'o': case 'p': case 'q': case 'r': case 's': case 't': + case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': + /* These characters don't cause problems, no matter what the + quoting style is. They cannot start multibyte sequences. + A digit or a special letter would cause trouble if it + appeared at the beginning of quote_string because we'd then + escape by prepending a backslash. However, it's hard to + imagine any locale that would use digits or letters as + quotes, and set_custom_quoting is documented not to accept + them. Also, a digit or a special letter would cause + trouble if it appeared in quote_these_too, but that's also + documented as not accepting them. */ + break; + + default: + /* If we have a multibyte sequence, copy it until we reach + its end, find an error, or come back to the initial shift + state. For C-like styles, if the sequence has + unprintable characters, escape the whole sequence, since + we can't easily escape single characters within it. */ + { + /* Length of multibyte sequence found so far. */ + size_t m; + + bool printable; + + if (unibyte_locale) + { + m = 1; + printable = isprint (c) != 0; + } + else + { + mbstate_t mbstate; + memset (&mbstate, 0, sizeof mbstate); + + m = 0; + printable = true; + if (argsize == SIZE_MAX) + argsize = strlen (arg); + + do + { + wchar_t w; + size_t bytes = mbrtowc (&w, &arg[i + m], + argsize - (i + m), &mbstate); + if (bytes == 0) + break; + else if (bytes == (size_t) -1) + { + printable = false; + break; + } + else if (bytes == (size_t) -2) + { + printable = false; + while (i + m < argsize && arg[i + m]) + m++; + break; + } + else + { + /* Work around a bug with older shells that "see" a '\' + that is really the 2nd byte of a multibyte character. + In practice the problem is limited to ASCII + chars >= '@' that are shell special chars. */ + if ('[' == 0x5b && elide_outer_quotes + && quoting_style == shell_always_quoting_style) + { + size_t j; + for (j = 1; j < bytes; j++) + switch (arg[i + m + j]) + { + case '[': case '\\': case '^': + case '`': case '|': + goto force_outer_quoting_style; + + default: + break; + } + } + + if (! iswprint (w)) + printable = false; + m += bytes; + } + } + while (! mbsinit (&mbstate)); + } + + if (1 < m || (backslash_escapes && ! printable)) + { + /* Output a multibyte sequence, or an escaped + unprintable unibyte character. */ + size_t ilim = i + m; + + for (;;) + { + if (backslash_escapes && ! printable) + { + if (elide_outer_quotes) + goto force_outer_quoting_style; + STORE ('\\'); + STORE ('0' + (c >> 6)); + STORE ('0' + ((c >> 3) & 7)); + c = '0' + (c & 7); + } + else if (is_right_quote) + { + STORE ('\\'); + is_right_quote = false; + } + if (ilim <= i + 1) + break; + STORE (c); + c = arg[++i]; + } + + goto store_c; + } + } + } + + if (! ((backslash_escapes || elide_outer_quotes) + && quote_these_too + && quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))) + && !is_right_quote) + goto store_c; + + store_escape: + if (elide_outer_quotes) + goto force_outer_quoting_style; + STORE ('\\'); + + store_c: + STORE (c); + } + + if (len == 0 && quoting_style == shell_always_quoting_style + && elide_outer_quotes) + goto force_outer_quoting_style; + + if (quote_string && !elide_outer_quotes) + for (; *quote_string; quote_string++) + STORE (*quote_string); + + if (len < buffersize) + buffer[len] = '\0'; + return len; + + force_outer_quoting_style: + /* Don't reuse quote_these_too, since the addition of outer quotes + sufficiently quotes the specified characters. */ + return quotearg_buffer_restyled (buffer, buffersize, arg, argsize, + quoting_style, + flags & ~QA_ELIDE_OUTER_QUOTES, NULL, + left_quote, right_quote); +} + +/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of + argument ARG (of size ARGSIZE), using O to control quoting. + If O is null, use the default. + Terminate the output with a null character, and return the written + size of the output, not counting the terminating null. + If BUFFERSIZE is too small to store the output string, return the + value that would have been returned had BUFFERSIZE been large enough. + If ARGSIZE is SIZE_MAX, use the string length of the argument for + ARGSIZE. */ +size_t +quotearg_buffer (char *buffer, size_t buffersize, + char const *arg, size_t argsize, + struct quoting_options const *o) +{ + struct quoting_options const *p = o ? o : &default_quoting_options; + int e = errno; + size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize, + p->style, p->flags, p->quote_these_too, + p->left_quote, p->right_quote); + errno = e; + return r; +} + +/* Equivalent to quotearg_alloc (ARG, ARGSIZE, NULL, O). */ +char * +quotearg_alloc (char const *arg, size_t argsize, + struct quoting_options const *o) +{ + return quotearg_alloc_mem (arg, argsize, NULL, o); +} + +/* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly + allocated storage containing the quoted string, and store the + resulting size into *SIZE, if non-NULL. The result can contain + embedded null bytes only if ARGSIZE is not SIZE_MAX, SIZE is not + NULL, and set_quoting_flags has not set the null byte elision + flag. */ +char * +quotearg_alloc_mem (char const *arg, size_t argsize, size_t *size, + struct quoting_options const *o) +{ + struct quoting_options const *p = o ? o : &default_quoting_options; + int e = errno; + /* Elide embedded null bytes if we can't return a size. */ + int flags = p->flags | (size ? 0 : QA_ELIDE_NULL_BYTES); + size_t bufsize = quotearg_buffer_restyled (0, 0, arg, argsize, p->style, + flags, p->quote_these_too, + p->left_quote, + p->right_quote) + 1; + char *buf = xcharalloc (bufsize); + quotearg_buffer_restyled (buf, bufsize, arg, argsize, p->style, flags, + p->quote_these_too, + p->left_quote, p->right_quote); + errno = e; + if (size) + *size = bufsize - 1; + return buf; +} + +/* A storage slot with size and pointer to a value. */ +struct slotvec +{ + size_t size; + char *val; +}; + +/* Preallocate a slot 0 buffer, so that the caller can always quote + one small component of a "memory exhausted" message in slot 0. */ +static char slot0[256]; +static unsigned int nslots = 1; +static struct slotvec slotvec0 = {sizeof slot0, slot0}; +static struct slotvec *slotvec = &slotvec0; + +void +quotearg_free (void) +{ + struct slotvec *sv = slotvec; + unsigned int i; + for (i = 1; i < nslots; i++) + free (sv[i].val); + if (sv[0].val != slot0) + { + free (sv[0].val); + slotvec0.size = sizeof slot0; + slotvec0.val = slot0; + } + if (sv != &slotvec0) + { + free (sv); + slotvec = &slotvec0; + } + nslots = 1; +} + +/* Use storage slot N to return a quoted version of argument ARG. + ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a + null-terminated string. + OPTIONS specifies the quoting options. + The returned value points to static storage that can be + reused by the next call to this function with the same value of N. + N must be nonnegative. N is deliberately declared with type "int" + to allow for future extensions (using negative values). */ +static char * +quotearg_n_options (int n, char const *arg, size_t argsize, + struct quoting_options const *options) +{ + int e = errno; + + unsigned int n0 = n; + struct slotvec *sv = slotvec; + + if (n < 0) + abort (); + + if (nslots <= n0) + { + /* FIXME: technically, the type of n1 should be 'unsigned int', + but that evokes an unsuppressible warning from gcc-4.0.1 and + older. If gcc ever provides an option to suppress that warning, + revert to the original type, so that the test in xalloc_oversized + is once again performed only at compile time. */ + size_t n1 = n0 + 1; + bool preallocated = (sv == &slotvec0); + + if (xalloc_oversized (n1, sizeof *sv)) + xalloc_die (); + + slotvec = sv = xrealloc (preallocated ? NULL : sv, n1 * sizeof *sv); + if (preallocated) + *sv = slotvec0; + memset (sv + nslots, 0, (n1 - nslots) * sizeof *sv); + nslots = n1; + } + + { + size_t size = sv[n].size; + char *val = sv[n].val; + /* Elide embedded null bytes since we don't return a size. */ + int flags = options->flags | QA_ELIDE_NULL_BYTES; + size_t qsize = quotearg_buffer_restyled (val, size, arg, argsize, + options->style, flags, + options->quote_these_too, + options->left_quote, + options->right_quote); + + if (size <= qsize) + { + sv[n].size = size = qsize + 1; + if (val != slot0) + free (val); + sv[n].val = val = xcharalloc (size); + quotearg_buffer_restyled (val, size, arg, argsize, options->style, + flags, options->quote_these_too, + options->left_quote, + options->right_quote); + } + + errno = e; + return val; + } +} + +char * +quotearg_n (int n, char const *arg) +{ + return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options); +} + +char * +quotearg_n_mem (int n, char const *arg, size_t argsize) +{ + return quotearg_n_options (n, arg, argsize, &default_quoting_options); +} + +char * +quotearg (char const *arg) +{ + return quotearg_n (0, arg); +} + +char * +quotearg_mem (char const *arg, size_t argsize) +{ + return quotearg_n_mem (0, arg, argsize); +} + +char * +quotearg_n_style (int n, enum quoting_style s, char const *arg) +{ + struct quoting_options const o = quoting_options_from_style (s); + return quotearg_n_options (n, arg, SIZE_MAX, &o); +} + +char * +quotearg_n_style_mem (int n, enum quoting_style s, + char const *arg, size_t argsize) +{ + struct quoting_options const o = quoting_options_from_style (s); + return quotearg_n_options (n, arg, argsize, &o); +} + +char * +quotearg_style (enum quoting_style s, char const *arg) +{ + return quotearg_n_style (0, s, arg); +} + +char * +quotearg_style_mem (enum quoting_style s, char const *arg, size_t argsize) +{ + return quotearg_n_style_mem (0, s, arg, argsize); +} + +char * +quotearg_char_mem (char const *arg, size_t argsize, char ch) +{ + struct quoting_options options; + options = default_quoting_options; + set_char_quoting (&options, ch, 1); + return quotearg_n_options (0, arg, argsize, &options); +} + +char * +quotearg_char (char const *arg, char ch) +{ + return quotearg_char_mem (arg, SIZE_MAX, ch); +} + +char * +quotearg_colon (char const *arg) +{ + return quotearg_char (arg, ':'); +} + +char * +quotearg_colon_mem (char const *arg, size_t argsize) +{ + return quotearg_char_mem (arg, argsize, ':'); +} + +char * +quotearg_n_custom (int n, char const *left_quote, + char const *right_quote, char const *arg) +{ + return quotearg_n_custom_mem (n, left_quote, right_quote, arg, + SIZE_MAX); +} + +char * +quotearg_n_custom_mem (int n, char const *left_quote, + char const *right_quote, + char const *arg, size_t argsize) +{ + struct quoting_options o = default_quoting_options; + set_custom_quoting (&o, left_quote, right_quote); + return quotearg_n_options (n, arg, argsize, &o); +} + +char * +quotearg_custom (char const *left_quote, char const *right_quote, + char const *arg) +{ + return quotearg_n_custom (0, left_quote, right_quote, arg); +} + +char * +quotearg_custom_mem (char const *left_quote, char const *right_quote, + char const *arg, size_t argsize) +{ + return quotearg_n_custom_mem (0, left_quote, right_quote, arg, + argsize); +} + + +/* The quoting option used by the functions of quote.h. */ +struct quoting_options quote_quoting_options = + { + locale_quoting_style, + 0, + { 0 }, + NULL, NULL + }; + +char const * +quote_n_mem (int n, char const *arg, size_t argsize) +{ + return quotearg_n_options (n, arg, argsize, "e_quoting_options); +} + +char const * +quote_mem (char const *arg, size_t argsize) +{ + return quote_n_mem (0, arg, argsize); +} + +char const * +quote_n (int n, char const *arg) +{ + return quote_n_mem (n, arg, SIZE_MAX); +} + +char const * +quote (char const *arg) +{ + return quote_n (0, arg); +} diff --git a/lib/quotearg.h b/lib/quotearg.h new file mode 100644 index 000000000..58ee3f608 --- /dev/null +++ b/lib/quotearg.h @@ -0,0 +1,391 @@ +/* quotearg.h - quote arguments for output + + Copyright (C) 1998-2002, 2004, 2006, 2008-2013 Free Software Foundation, + Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Paul Eggert */ + +#ifndef QUOTEARG_H_ +# define QUOTEARG_H_ 1 + +# include + +/* Basic quoting styles. For each style, an example is given on the + input strings "simple", "\0 \t\n'\"\033?""?/\\", and "a:b", using + quotearg_buffer, quotearg_mem, and quotearg_colon_mem with that + style and the default flags and quoted characters. Note that the + examples are shown here as valid C strings rather than what + displays on a terminal (with "??/" as a trigraph for "\\"). */ +enum quoting_style + { + /* Output names as-is (ls --quoting-style=literal). Can result in + embedded null bytes if QA_ELIDE_NULL_BYTES is not in + effect. + + quotearg_buffer: + "simple", "\0 \t\n'\"\033??/\\", "a:b" + quotearg: + "simple", " \t\n'\"\033??/\\", "a:b" + quotearg_colon: + "simple", " \t\n'\"\033??/\\", "a:b" + */ + literal_quoting_style, + + /* Quote names for the shell if they contain shell metacharacters + or would cause ambiguous output (ls --quoting-style=shell). + Can result in embedded null bytes if QA_ELIDE_NULL_BYTES is not + in effect. + + quotearg_buffer: + "simple", "'\0 \t\n'\\''\"\033??/\\'", "a:b" + quotearg: + "simple", "' \t\n'\\''\"\033??/\\'", "a:b" + quotearg_colon: + "simple", "' \t\n'\\''\"\033??/\\'", "'a:b'" + */ + shell_quoting_style, + + /* Quote names for the shell, even if they would normally not + require quoting (ls --quoting-style=shell-always). Can result + in embedded null bytes if QA_ELIDE_NULL_BYTES is not in effect. + Behaves like shell_quoting_style if QA_ELIDE_OUTER_QUOTES is in + effect. + + quotearg_buffer: + "'simple'", "'\0 \t\n'\\''\"\033??/\\'", "'a:b'" + quotearg: + "'simple'", "' \t\n'\\''\"\033??/\\'", "'a:b'" + quotearg_colon: + "'simple'", "' \t\n'\\''\"\033??/\\'", "'a:b'" + */ + shell_always_quoting_style, + + /* Quote names as for a C language string (ls --quoting-style=c). + Behaves like c_maybe_quoting_style if QA_ELIDE_OUTER_QUOTES is + in effect. Split into consecutive strings if + QA_SPLIT_TRIGRAPHS. + + quotearg_buffer: + "\"simple\"", "\"\\0 \\t\\n'\\\"\\033??/\\\\\"", "\"a:b\"" + quotearg: + "\"simple\"", "\"\\0 \\t\\n'\\\"\\033??/\\\\\"", "\"a:b\"" + quotearg_colon: + "\"simple\"", "\"\\0 \\t\\n'\\\"\\033??/\\\\\"", "\"a\\:b\"" + */ + c_quoting_style, + + /* Like c_quoting_style except omit the surrounding double-quote + characters if no quoted characters are encountered. + + quotearg_buffer: + "simple", "\"\\0 \\t\\n'\\\"\\033??/\\\\\"", "a:b" + quotearg: + "simple", "\"\\0 \\t\\n'\\\"\\033??/\\\\\"", "a:b" + quotearg_colon: + "simple", "\"\\0 \\t\\n'\\\"\\033??/\\\\\"", "\"a:b\"" + */ + c_maybe_quoting_style, + + /* Like c_quoting_style except always omit the surrounding + double-quote characters and ignore QA_SPLIT_TRIGRAPHS + (ls --quoting-style=escape). + + quotearg_buffer: + "simple", "\\0 \\t\\n'\"\\033??/\\\\", "a:b" + quotearg: + "simple", "\\0 \\t\\n'\"\\033??/\\\\", "a:b" + quotearg_colon: + "simple", "\\0 \\t\\n'\"\\033??/\\\\", "a\\:b" + */ + escape_quoting_style, + + /* Like clocale_quoting_style, but use single quotes in the + default C locale or if the program does not use gettext + (ls --quoting-style=locale). For UTF-8 locales, quote + characters will use Unicode. + + LC_MESSAGES=C + quotearg_buffer: + "`simple'", "`\\0 \\t\\n\\'\"\\033??/\\\\'", "`a:b'" + quotearg: + "`simple'", "`\\0 \\t\\n\\'\"\\033??/\\\\'", "`a:b'" + quotearg_colon: + "`simple'", "`\\0 \\t\\n\\'\"\\033??/\\\\'", "`a\\:b'" + + LC_MESSAGES=pt_PT.utf8 + quotearg_buffer: + "\302\253simple\302\273", + "\302\253\\0 \\t\\n'\"\\033??/\\\\\302\253", "\302\253a:b\302\273" + quotearg: + "\302\253simple\302\273", + "\302\253\\0 \\t\\n'\"\\033??/\\\\\302\253", "\302\253a:b\302\273" + quotearg_colon: + "\302\253simple\302\273", + "\302\253\\0 \\t\\n'\"\\033??/\\\\\302\253", "\302\253a\\:b\302\273" + */ + locale_quoting_style, + + /* Like c_quoting_style except use quotation marks appropriate for + the locale and ignore QA_SPLIT_TRIGRAPHS + (ls --quoting-style=clocale). + + LC_MESSAGES=C + quotearg_buffer: + "\"simple\"", "\"\\0 \\t\\n'\\\"\\033??/\\\\\"", "\"a:b\"" + quotearg: + "\"simple\"", "\"\\0 \\t\\n'\\\"\\033??/\\\\\"", "\"a:b\"" + quotearg_colon: + "\"simple\"", "\"\\0 \\t\\n'\\\"\\033??/\\\\\"", "\"a\\:b\"" + + LC_MESSAGES=pt_PT.utf8 + quotearg_buffer: + "\302\253simple\302\273", + "\302\253\\0 \\t\\n'\"\\033??/\\\\\302\253", "\302\253a:b\302\273" + quotearg: + "\302\253simple\302\273", + "\302\253\\0 \\t\\n'\"\\033??/\\\\\302\253", "\302\253a:b\302\273" + quotearg_colon: + "\302\253simple\302\273", + "\302\253\\0 \\t\\n'\"\\033??/\\\\\302\253", "\302\253a\\:b\302\273" + */ + clocale_quoting_style, + + /* Like clocale_quoting_style except use the custom quotation marks + set by set_custom_quoting. If custom quotation marks are not + set, the behavior is undefined. + + left_quote = right_quote = "'" + quotearg_buffer: + "'simple'", "'\\0 \\t\\n\\'\"\\033??/\\\\'", "'a:b'" + quotearg: + "'simple'", "'\\0 \\t\\n\\'\"\\033??/\\\\'", "'a:b'" + quotearg_colon: + "'simple'", "'\\0 \\t\\n\\'\"\\033??/\\\\'", "'a\\:b'" + + left_quote = "(" and right_quote = ")" + quotearg_buffer: + "(simple)", "(\\0 \\t\\n'\"\\033??/\\\\)", "(a:b)" + quotearg: + "(simple)", "(\\0 \\t\\n'\"\\033??/\\\\)", "(a:b)" + quotearg_colon: + "(simple)", "(\\0 \\t\\n'\"\\033??/\\\\)", "(a\\:b)" + + left_quote = ":" and right_quote = " " + quotearg_buffer: + ":simple ", ":\\0\\ \\t\\n'\"\\033??/\\\\ ", ":a:b " + quotearg: + ":simple ", ":\\0\\ \\t\\n'\"\\033??/\\\\ ", ":a:b " + quotearg_colon: + ":simple ", ":\\0\\ \\t\\n'\"\\033??/\\\\ ", ":a\\:b " + + left_quote = "\"'" and right_quote = "'\"" + Notice that this is treated as a single level of quotes or two + levels where the outer quote need not be escaped within the inner + quotes. For two levels where the outer quote must be escaped + within the inner quotes, you must use separate quotearg + invocations. + quotearg_buffer: + "\"'simple'\"", "\"'\\0 \\t\\n\\'\"\\033??/\\\\'\"", "\"'a:b'\"" + quotearg: + "\"'simple'\"", "\"'\\0 \\t\\n\\'\"\\033??/\\\\'\"", "\"'a:b'\"" + quotearg_colon: + "\"'simple'\"", "\"'\\0 \\t\\n\\'\"\\033??/\\\\'\"", "\"'a\\:b'\"" + */ + custom_quoting_style + }; + +/* Flags for use in set_quoting_flags. */ +enum quoting_flags + { + /* Always elide null bytes from styles that do not quote them, + even when the length of the result is available to the + caller. */ + QA_ELIDE_NULL_BYTES = 0x01, + + /* Omit the surrounding quote characters if no escaped characters + are encountered. Note that if no other character needs + escaping, then neither does the escape character. */ + QA_ELIDE_OUTER_QUOTES = 0x02, + + /* In the c_quoting_style and c_maybe_quoting_style, split ANSI + trigraph sequences into concatenated strings (for example, + "?""?/" rather than "??/", which could be confused with + "\\"). */ + QA_SPLIT_TRIGRAPHS = 0x04 + }; + +/* For now, --quoting-style=literal is the default, but this may change. */ +# ifndef DEFAULT_QUOTING_STYLE +# define DEFAULT_QUOTING_STYLE literal_quoting_style +# endif + +/* Names of quoting styles and their corresponding values. */ +extern char const *const quoting_style_args[]; +extern enum quoting_style const quoting_style_vals[]; + +struct quoting_options; + +/* The functions listed below set and use a hidden variable + that contains the default quoting style options. */ + +/* Allocate a new set of quoting options, with contents initially identical + to O if O is not null, or to the default if O is null. + It is the caller's responsibility to free the result. */ +struct quoting_options *clone_quoting_options (struct quoting_options *o); + +/* Get the value of O's quoting style. If O is null, use the default. */ +enum quoting_style get_quoting_style (struct quoting_options *o); + +/* In O (or in the default if O is null), + set the value of the quoting style to S. */ +void set_quoting_style (struct quoting_options *o, enum quoting_style s); + +/* In O (or in the default if O is null), + set the value of the quoting options for character C to I. + Return the old value. Currently, the only values defined for I are + 0 (the default) and 1 (which means to quote the character even if + it would not otherwise be quoted). C must never be a digit or a + letter that has special meaning after a backslash (for example, "\t" + for tab). */ +int set_char_quoting (struct quoting_options *o, char c, int i); + +/* In O (or in the default if O is null), + set the value of the quoting options flag to I, which can be a + bitwise combination of enum quoting_flags, or 0 for default + behavior. Return the old value. */ +int set_quoting_flags (struct quoting_options *o, int i); + +/* In O (or in the default if O is null), + set the value of the quoting style to custom_quoting_style, + set the left quote to LEFT_QUOTE, and set the right quote to + RIGHT_QUOTE. Each of LEFT_QUOTE and RIGHT_QUOTE must be + null-terminated and can be the empty string. Because backslashes are + used for escaping, it does not make sense for RIGHT_QUOTE to contain + a backslash. RIGHT_QUOTE must not begin with a digit or a letter + that has special meaning after a backslash (for example, "\t" for + tab). */ +void set_custom_quoting (struct quoting_options *o, + char const *left_quote, + char const *right_quote); + +/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of + argument ARG (of size ARGSIZE), using O to control quoting. + If O is null, use the default. + Terminate the output with a null character, and return the written + size of the output, not counting the terminating null. + If BUFFERSIZE is too small to store the output string, return the + value that would have been returned had BUFFERSIZE been large enough. + If ARGSIZE is -1, use the string length of the argument for ARGSIZE. + On output, BUFFER might contain embedded null bytes if ARGSIZE was + not -1, the style of O does not use backslash escapes, and the + flags of O do not request elision of null bytes.*/ +size_t quotearg_buffer (char *buffer, size_t buffersize, + char const *arg, size_t argsize, + struct quoting_options const *o); + +/* Like quotearg_buffer, except return the result in a newly allocated + buffer. It is the caller's responsibility to free the result. The + result will not contain embedded null bytes. */ +char *quotearg_alloc (char const *arg, size_t argsize, + struct quoting_options const *o); + +/* Like quotearg_alloc, except that the length of the result, + excluding the terminating null byte, is stored into SIZE if it is + non-NULL. The result might contain embedded null bytes if ARGSIZE + was not -1, SIZE was not NULL, the style of O does not use + backslash escapes, and the flags of O do not request elision of + null bytes.*/ +char *quotearg_alloc_mem (char const *arg, size_t argsize, + size_t *size, struct quoting_options const *o); + +/* Use storage slot N to return a quoted version of the string ARG. + Use the default quoting options. + The returned value points to static storage that can be + reused by the next call to this function with the same value of N. + N must be nonnegative. The output of all functions in the + quotearg_n family are guaranteed to not contain embedded null + bytes.*/ +char *quotearg_n (int n, char const *arg); + +/* Equivalent to quotearg_n (0, ARG). */ +char *quotearg (char const *arg); + +/* Use storage slot N to return a quoted version of the argument ARG + of size ARGSIZE. This is like quotearg_n (N, ARG), except it can + quote null bytes. */ +char *quotearg_n_mem (int n, char const *arg, size_t argsize); + +/* Equivalent to quotearg_n_mem (0, ARG, ARGSIZE). */ +char *quotearg_mem (char const *arg, size_t argsize); + +/* Use style S and storage slot N to return a quoted version of the string ARG. + This is like quotearg_n (N, ARG), except that it uses S with no other + options to specify the quoting method. */ +char *quotearg_n_style (int n, enum quoting_style s, char const *arg); + +/* Use style S and storage slot N to return a quoted version of the + argument ARG of size ARGSIZE. This is like quotearg_n_style + (N, S, ARG), except it can quote null bytes. */ +char *quotearg_n_style_mem (int n, enum quoting_style s, + char const *arg, size_t argsize); + +/* Equivalent to quotearg_n_style (0, S, ARG). */ +char *quotearg_style (enum quoting_style s, char const *arg); + +/* Equivalent to quotearg_n_style_mem (0, S, ARG, ARGSIZE). */ +char *quotearg_style_mem (enum quoting_style s, + char const *arg, size_t argsize); + +/* Like quotearg (ARG), except also quote any instances of CH. + See set_char_quoting for a description of acceptable CH values. */ +char *quotearg_char (char const *arg, char ch); + +/* Like quotearg_char (ARG, CH), except it can quote null bytes. */ +char *quotearg_char_mem (char const *arg, size_t argsize, char ch); + +/* Equivalent to quotearg_char (ARG, ':'). */ +char *quotearg_colon (char const *arg); + +/* Like quotearg_colon (ARG), except it can quote null bytes. */ +char *quotearg_colon_mem (char const *arg, size_t argsize); + +/* Like quotearg_n_style (N, S, ARG) but with S as custom_quoting_style + with left quote as LEFT_QUOTE and right quote as RIGHT_QUOTE. See + set_custom_quoting for a description of acceptable LEFT_QUOTE and + RIGHT_QUOTE values. */ +char *quotearg_n_custom (int n, char const *left_quote, + char const *right_quote, char const *arg); + +/* Like quotearg_n_custom (N, LEFT_QUOTE, RIGHT_QUOTE, ARG) except it + can quote null bytes. */ +char *quotearg_n_custom_mem (int n, char const *left_quote, + char const *right_quote, + char const *arg, size_t argsize); + +/* Equivalent to quotearg_n_custom (0, LEFT_QUOTE, RIGHT_QUOTE, ARG). */ +char *quotearg_custom (char const *left_quote, char const *right_quote, + char const *arg); + +/* Equivalent to quotearg_n_custom_mem (0, LEFT_QUOTE, RIGHT_QUOTE, ARG, + ARGSIZE). */ +char *quotearg_custom_mem (char const *left_quote, + char const *right_quote, + char const *arg, size_t argsize); + +/* Free any dynamically allocated memory. */ +void quotearg_free (void); + +#endif /* !QUOTEARG_H_ */ diff --git a/lib/ref-add.sin b/lib/ref-add.sin new file mode 100644 index 000000000..112bcdc64 --- /dev/null +++ b/lib/ref-add.sin @@ -0,0 +1,29 @@ +# Add this package to a list of references stored in a text file. +# +# Copyright (C) 2000, 2009-2013 Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, see . +# +# Written by Bruno Haible . +# +/^# Packages using this file: / { + s/# Packages using this file:// + ta + :a + s/ @PACKAGE@ / @PACKAGE@ / + tb + s/ $/ @PACKAGE@ / + :b + s/^/# Packages using this file:/ +} diff --git a/lib/ref-del.sin b/lib/ref-del.sin new file mode 100644 index 000000000..6f7386847 --- /dev/null +++ b/lib/ref-del.sin @@ -0,0 +1,24 @@ +# Remove this package from a list of references stored in a text file. +# +# Copyright (C) 2000, 2009-2013 Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, see . +# +# Written by Bruno Haible . +# +/^# Packages using this file: / { + s/# Packages using this file:// + s/ @PACKAGE@ / / + s/^/# Packages using this file:/ +} diff --git a/lib/streq.h b/lib/streq.h new file mode 100644 index 000000000..12c1867c8 --- /dev/null +++ b/lib/streq.h @@ -0,0 +1,176 @@ +/* Optimized string comparison. + Copyright (C) 2001-2002, 2007, 2009-2013 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible . */ + +#ifndef _GL_STREQ_H +#define _GL_STREQ_H + +#include + +/* STREQ_OPT allows to optimize string comparison with a small literal string. + STREQ_OPT (s, "EUC-KR", 'E', 'U', 'C', '-', 'K', 'R', 0, 0, 0) + is semantically equivalent to + strcmp (s, "EUC-KR") == 0 + just faster. */ + +/* Help GCC to generate good code for string comparisons with + immediate strings. */ +#if defined (__GNUC__) && defined (__OPTIMIZE__) + +static inline int +streq9 (const char *s1, const char *s2) +{ + return strcmp (s1 + 9, s2 + 9) == 0; +} + +static inline int +streq8 (const char *s1, const char *s2, char s28) +{ + if (s1[8] == s28) + { + if (s28 == 0) + return 1; + else + return streq9 (s1, s2); + } + else + return 0; +} + +static inline int +streq7 (const char *s1, const char *s2, char s27, char s28) +{ + if (s1[7] == s27) + { + if (s27 == 0) + return 1; + else + return streq8 (s1, s2, s28); + } + else + return 0; +} + +static inline int +streq6 (const char *s1, const char *s2, char s26, char s27, char s28) +{ + if (s1[6] == s26) + { + if (s26 == 0) + return 1; + else + return streq7 (s1, s2, s27, s28); + } + else + return 0; +} + +static inline int +streq5 (const char *s1, const char *s2, char s25, char s26, char s27, char s28) +{ + if (s1[5] == s25) + { + if (s25 == 0) + return 1; + else + return streq6 (s1, s2, s26, s27, s28); + } + else + return 0; +} + +static inline int +streq4 (const char *s1, const char *s2, char s24, char s25, char s26, char s27, char s28) +{ + if (s1[4] == s24) + { + if (s24 == 0) + return 1; + else + return streq5 (s1, s2, s25, s26, s27, s28); + } + else + return 0; +} + +static inline int +streq3 (const char *s1, const char *s2, char s23, char s24, char s25, char s26, char s27, char s28) +{ + if (s1[3] == s23) + { + if (s23 == 0) + return 1; + else + return streq4 (s1, s2, s24, s25, s26, s27, s28); + } + else + return 0; +} + +static inline int +streq2 (const char *s1, const char *s2, char s22, char s23, char s24, char s25, char s26, char s27, char s28) +{ + if (s1[2] == s22) + { + if (s22 == 0) + return 1; + else + return streq3 (s1, s2, s23, s24, s25, s26, s27, s28); + } + else + return 0; +} + +static inline int +streq1 (const char *s1, const char *s2, char s21, char s22, char s23, char s24, char s25, char s26, char s27, char s28) +{ + if (s1[1] == s21) + { + if (s21 == 0) + return 1; + else + return streq2 (s1, s2, s22, s23, s24, s25, s26, s27, s28); + } + else + return 0; +} + +static inline int +streq0 (const char *s1, const char *s2, char s20, char s21, char s22, char s23, char s24, char s25, char s26, char s27, char s28) +{ + if (s1[0] == s20) + { + if (s20 == 0) + return 1; + else + return streq1 (s1, s2, s21, s22, s23, s24, s25, s26, s27, s28); + } + else + return 0; +} + +#define STREQ_OPT(s1,s2,s20,s21,s22,s23,s24,s25,s26,s27,s28) \ + streq0 (s1, s2, s20, s21, s22, s23, s24, s25, s26, s27, s28) + +#else + +#define STREQ_OPT(s1,s2,s20,s21,s22,s23,s24,s25,s26,s27,s28) \ + (strcmp (s1, s2) == 0) + +#endif + +#endif /* _GL_STREQ_H */ diff --git a/lib/wctype-h.c b/lib/wctype-h.c new file mode 100644 index 000000000..bb5f847e3 --- /dev/null +++ b/lib/wctype-h.c @@ -0,0 +1,4 @@ +/* Normally this would be wctype.c, but that name's already taken. */ +#include +#define _GL_WCTYPE_INLINE _GL_EXTERN_INLINE +#include "wctype.h" diff --git a/lib/wctype.in.h b/lib/wctype.in.h new file mode 100644 index 000000000..a7c07d1e8 --- /dev/null +++ b/lib/wctype.in.h @@ -0,0 +1,511 @@ +/* A substitute for ISO C99 , for platforms that lack it. + + Copyright (C) 2006-2013 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see . */ + +/* Written by Bruno Haible and Paul Eggert. */ + +/* + * ISO C 99 for platforms that lack it. + * + * + * iswctype, towctrans, towlower, towupper, wctrans, wctype, + * wctrans_t, and wctype_t are not yet implemented. + */ + +#ifndef _@GUARD_PREFIX@_WCTYPE_H + +#if __GNUC__ >= 3 +@PRAGMA_SYSTEM_HEADER@ +#endif +@PRAGMA_COLUMNS@ + +#if @HAVE_WINT_T@ +/* Solaris 2.5 has a bug: must be included before . + Tru64 with Desktop Toolkit C has a bug: must be included before + . + BSD/OS 4.0.1 has a bug: , and must be + included before . */ +# include +# include +# include +# include +#endif + +/* mingw has declarations of towupper and towlower in as + well . Include in advance to avoid rpl_ prefix + being added to the declarations. */ +#ifdef __MINGW32__ +# include +#endif + +/* Include the original if it exists. + BeOS 5 has the functions but no . */ +/* The include_next requires a split double-inclusion guard. */ +#if @HAVE_WCTYPE_H@ +# @INCLUDE_NEXT@ @NEXT_WCTYPE_H@ +#endif + +#ifndef _@GUARD_PREFIX@_WCTYPE_H +#define _@GUARD_PREFIX@_WCTYPE_H + +_GL_INLINE_HEADER_BEGIN +#ifndef _GL_WCTYPE_INLINE +# define _GL_WCTYPE_INLINE _GL_INLINE +#endif + +/* The definitions of _GL_FUNCDECL_RPL etc. are copied here. */ + +/* The definition of _GL_WARN_ON_USE is copied here. */ + +/* Solaris 2.6 includes which includes which + #defines a number of identifiers in the application namespace. Revert + these #defines. */ +#ifdef __sun +# undef multibyte +# undef eucw1 +# undef eucw2 +# undef eucw3 +# undef scrw1 +# undef scrw2 +# undef scrw3 +#endif + +/* Define wint_t and WEOF. (Also done in wchar.in.h.) */ +#if !@HAVE_WINT_T@ && !defined wint_t +# define wint_t int +# ifndef WEOF +# define WEOF -1 +# endif +#else +/* MSVC defines wint_t as 'unsigned short' in . + This is too small: ISO C 99 section 7.24.1.(2) says that wint_t must be + "unchanged by default argument promotions". Override it. */ +# if defined _MSC_VER +# if !GNULIB_defined_wint_t +# include +typedef unsigned int rpl_wint_t; +# undef wint_t +# define wint_t rpl_wint_t +# define GNULIB_defined_wint_t 1 +# endif +# endif +# ifndef WEOF +# define WEOF ((wint_t) -1) +# endif +#endif + + +#if !GNULIB_defined_wctype_functions + +/* FreeBSD 4.4 to 4.11 has but lacks the functions. + Linux libc5 has and the functions but they are broken. + Assume all 11 functions (all isw* except iswblank) are implemented the + same way, or not at all. */ +# if ! @HAVE_ISWCNTRL@ || @REPLACE_ISWCNTRL@ + +/* IRIX 5.3 has macros but no functions, its isw* macros refer to an + undefined variable _ctmp_ and to macros like _P, and they + refer to system functions like _iswctype that are not in the + standard C library. Rather than try to get ancient buggy + implementations like this to work, just disable them. */ +# undef iswalnum +# undef iswalpha +# undef iswblank +# undef iswcntrl +# undef iswdigit +# undef iswgraph +# undef iswlower +# undef iswprint +# undef iswpunct +# undef iswspace +# undef iswupper +# undef iswxdigit +# undef towlower +# undef towupper + +/* Linux libc5 has and the functions but they are broken. */ +# if @REPLACE_ISWCNTRL@ +# if !(defined __cplusplus && defined GNULIB_NAMESPACE) +# define iswalnum rpl_iswalnum +# define iswalpha rpl_iswalpha +# define iswblank rpl_iswblank +# define iswcntrl rpl_iswcntrl +# define iswdigit rpl_iswdigit +# define iswgraph rpl_iswgraph +# define iswlower rpl_iswlower +# define iswprint rpl_iswprint +# define iswpunct rpl_iswpunct +# define iswspace rpl_iswspace +# define iswupper rpl_iswupper +# define iswxdigit rpl_iswxdigit +# endif +# endif +# if @REPLACE_TOWLOWER@ +# if !(defined __cplusplus && defined GNULIB_NAMESPACE) +# define towlower rpl_towlower +# define towupper rpl_towupper +# endif +# endif + +_GL_WCTYPE_INLINE int +# if @REPLACE_ISWCNTRL@ +rpl_iswalnum +# else +iswalnum +# endif + (wint_t wc) +{ + return ((wc >= '0' && wc <= '9') + || ((wc & ~0x20) >= 'A' && (wc & ~0x20) <= 'Z')); +} + +_GL_WCTYPE_INLINE int +# if @REPLACE_ISWCNTRL@ +rpl_iswalpha +# else +iswalpha +# endif + (wint_t wc) +{ + return (wc & ~0x20) >= 'A' && (wc & ~0x20) <= 'Z'; +} + +_GL_WCTYPE_INLINE int +# if @REPLACE_ISWCNTRL@ +rpl_iswblank +# else +iswblank +# endif + (wint_t wc) +{ + return wc == ' ' || wc == '\t'; +} + +_GL_WCTYPE_INLINE int +# if @REPLACE_ISWCNTRL@ +rpl_iswcntrl +# else +iswcntrl +# endif + (wint_t wc) +{ + return (wc & ~0x1f) == 0 || wc == 0x7f; +} + +_GL_WCTYPE_INLINE int +# if @REPLACE_ISWCNTRL@ +rpl_iswdigit +# else +iswdigit +# endif + (wint_t wc) +{ + return wc >= '0' && wc <= '9'; +} + +_GL_WCTYPE_INLINE int +# if @REPLACE_ISWCNTRL@ +rpl_iswgraph +# else +iswgraph +# endif + (wint_t wc) +{ + return wc >= '!' && wc <= '~'; +} + +_GL_WCTYPE_INLINE int +# if @REPLACE_ISWCNTRL@ +rpl_iswlower +# else +iswlower +# endif + (wint_t wc) +{ + return wc >= 'a' && wc <= 'z'; +} + +_GL_WCTYPE_INLINE int +# if @REPLACE_ISWCNTRL@ +rpl_iswprint +# else +iswprint +# endif + (wint_t wc) +{ + return wc >= ' ' && wc <= '~'; +} + +_GL_WCTYPE_INLINE int +# if @REPLACE_ISWCNTRL@ +rpl_iswpunct +# else +iswpunct +# endif + (wint_t wc) +{ + return (wc >= '!' && wc <= '~' + && !((wc >= '0' && wc <= '9') + || ((wc & ~0x20) >= 'A' && (wc & ~0x20) <= 'Z'))); +} + +_GL_WCTYPE_INLINE int +# if @REPLACE_ISWCNTRL@ +rpl_iswspace +# else +iswspace +# endif + (wint_t wc) +{ + return (wc == ' ' || wc == '\t' + || wc == '\n' || wc == '\v' || wc == '\f' || wc == '\r'); +} + +_GL_WCTYPE_INLINE int +# if @REPLACE_ISWCNTRL@ +rpl_iswupper +# else +iswupper +# endif + (wint_t wc) +{ + return wc >= 'A' && wc <= 'Z'; +} + +_GL_WCTYPE_INLINE int +# if @REPLACE_ISWCNTRL@ +rpl_iswxdigit +# else +iswxdigit +# endif + (wint_t wc) +{ + return ((wc >= '0' && wc <= '9') + || ((wc & ~0x20) >= 'A' && (wc & ~0x20) <= 'F')); +} + +_GL_WCTYPE_INLINE wint_t +# if @REPLACE_TOWLOWER@ +rpl_towlower +# else +towlower +# endif + (wint_t wc) +{ + return (wc >= 'A' && wc <= 'Z' ? wc - 'A' + 'a' : wc); +} + +_GL_WCTYPE_INLINE wint_t +# if @REPLACE_TOWLOWER@ +rpl_towupper +# else +towupper +# endif + (wint_t wc) +{ + return (wc >= 'a' && wc <= 'z' ? wc - 'a' + 'A' : wc); +} + +# elif @GNULIB_ISWBLANK@ && (! @HAVE_ISWBLANK@ || @REPLACE_ISWBLANK@) +/* Only the iswblank function is missing. */ + +# if @REPLACE_ISWBLANK@ +# if !(defined __cplusplus && defined GNULIB_NAMESPACE) +# define iswblank rpl_iswblank +# endif +_GL_FUNCDECL_RPL (iswblank, int, (wint_t wc)); +# else +_GL_FUNCDECL_SYS (iswblank, int, (wint_t wc)); +# endif + +# endif + +# if defined __MINGW32__ + +/* On native Windows, wchar_t is uint16_t, and wint_t is uint32_t. + The functions towlower and towupper are implemented in the MSVCRT library + to take a wchar_t argument and return a wchar_t result. mingw declares + these functions to take a wint_t argument and return a wint_t result. + This means that: + 1. When the user passes an argument outside the range 0x0000..0xFFFF, the + function will look only at the lower 16 bits. This is allowed according + to POSIX. + 2. The return value is returned in the lower 16 bits of the result register. + The upper 16 bits are random: whatever happened to be in that part of the + result register. We need to fix this by adding a zero-extend from + wchar_t to wint_t after the call. */ + +_GL_WCTYPE_INLINE wint_t +rpl_towlower (wint_t wc) +{ + return (wint_t) (wchar_t) towlower (wc); +} +# if !(defined __cplusplus && defined GNULIB_NAMESPACE) +# define towlower rpl_towlower +# endif + +_GL_WCTYPE_INLINE wint_t +rpl_towupper (wint_t wc) +{ + return (wint_t) (wchar_t) towupper (wc); +} +# if !(defined __cplusplus && defined GNULIB_NAMESPACE) +# define towupper rpl_towupper +# endif + +# endif /* __MINGW32__ */ + +# define GNULIB_defined_wctype_functions 1 +#endif + +#if @REPLACE_ISWCNTRL@ +_GL_CXXALIAS_RPL (iswalnum, int, (wint_t wc)); +_GL_CXXALIAS_RPL (iswalpha, int, (wint_t wc)); +_GL_CXXALIAS_RPL (iswcntrl, int, (wint_t wc)); +_GL_CXXALIAS_RPL (iswdigit, int, (wint_t wc)); +_GL_CXXALIAS_RPL (iswgraph, int, (wint_t wc)); +_GL_CXXALIAS_RPL (iswlower, int, (wint_t wc)); +_GL_CXXALIAS_RPL (iswprint, int, (wint_t wc)); +_GL_CXXALIAS_RPL (iswpunct, int, (wint_t wc)); +_GL_CXXALIAS_RPL (iswspace, int, (wint_t wc)); +_GL_CXXALIAS_RPL (iswupper, int, (wint_t wc)); +_GL_CXXALIAS_RPL (iswxdigit, int, (wint_t wc)); +#else +_GL_CXXALIAS_SYS (iswalnum, int, (wint_t wc)); +_GL_CXXALIAS_SYS (iswalpha, int, (wint_t wc)); +_GL_CXXALIAS_SYS (iswcntrl, int, (wint_t wc)); +_GL_CXXALIAS_SYS (iswdigit, int, (wint_t wc)); +_GL_CXXALIAS_SYS (iswgraph, int, (wint_t wc)); +_GL_CXXALIAS_SYS (iswlower, int, (wint_t wc)); +_GL_CXXALIAS_SYS (iswprint, int, (wint_t wc)); +_GL_CXXALIAS_SYS (iswpunct, int, (wint_t wc)); +_GL_CXXALIAS_SYS (iswspace, int, (wint_t wc)); +_GL_CXXALIAS_SYS (iswupper, int, (wint_t wc)); +_GL_CXXALIAS_SYS (iswxdigit, int, (wint_t wc)); +#endif +_GL_CXXALIASWARN (iswalnum); +_GL_CXXALIASWARN (iswalpha); +_GL_CXXALIASWARN (iswcntrl); +_GL_CXXALIASWARN (iswdigit); +_GL_CXXALIASWARN (iswgraph); +_GL_CXXALIASWARN (iswlower); +_GL_CXXALIASWARN (iswprint); +_GL_CXXALIASWARN (iswpunct); +_GL_CXXALIASWARN (iswspace); +_GL_CXXALIASWARN (iswupper); +_GL_CXXALIASWARN (iswxdigit); + +#if @GNULIB_ISWBLANK@ +# if @REPLACE_ISWCNTRL@ || @REPLACE_ISWBLANK@ +_GL_CXXALIAS_RPL (iswblank, int, (wint_t wc)); +# else +_GL_CXXALIAS_SYS (iswblank, int, (wint_t wc)); +# endif +_GL_CXXALIASWARN (iswblank); +#endif + +#if !@HAVE_WCTYPE_T@ +# if !GNULIB_defined_wctype_t +typedef void * wctype_t; +# define GNULIB_defined_wctype_t 1 +# endif +#endif + +/* Get a descriptor for a wide character property. */ +#if @GNULIB_WCTYPE@ +# if !@HAVE_WCTYPE_T@ +_GL_FUNCDECL_SYS (wctype, wctype_t, (const char *name)); +# endif +_GL_CXXALIAS_SYS (wctype, wctype_t, (const char *name)); +_GL_CXXALIASWARN (wctype); +#elif defined GNULIB_POSIXCHECK +# undef wctype +# if HAVE_RAW_DECL_WCTYPE +_GL_WARN_ON_USE (wctype, "wctype is unportable - " + "use gnulib module wctype for portability"); +# endif +#endif + +/* Test whether a wide character has a given property. + The argument WC must be either a wchar_t value or WEOF. + The argument DESC must have been returned by the wctype() function. */ +#if @GNULIB_ISWCTYPE@ +# if !@HAVE_WCTYPE_T@ +_GL_FUNCDECL_SYS (iswctype, int, (wint_t wc, wctype_t desc)); +# endif +_GL_CXXALIAS_SYS (iswctype, int, (wint_t wc, wctype_t desc)); +_GL_CXXALIASWARN (iswctype); +#elif defined GNULIB_POSIXCHECK +# undef iswctype +# if HAVE_RAW_DECL_ISWCTYPE +_GL_WARN_ON_USE (iswctype, "iswctype is unportable - " + "use gnulib module iswctype for portability"); +# endif +#endif + +#if @REPLACE_TOWLOWER@ || defined __MINGW32__ +_GL_CXXALIAS_RPL (towlower, wint_t, (wint_t wc)); +_GL_CXXALIAS_RPL (towupper, wint_t, (wint_t wc)); +#else +_GL_CXXALIAS_SYS (towlower, wint_t, (wint_t wc)); +_GL_CXXALIAS_SYS (towupper, wint_t, (wint_t wc)); +#endif +_GL_CXXALIASWARN (towlower); +_GL_CXXALIASWARN (towupper); + +#if !@HAVE_WCTRANS_T@ +# if !GNULIB_defined_wctrans_t +typedef void * wctrans_t; +# define GNULIB_defined_wctrans_t 1 +# endif +#endif + +/* Get a descriptor for a wide character case conversion. */ +#if @GNULIB_WCTRANS@ +# if !@HAVE_WCTRANS_T@ +_GL_FUNCDECL_SYS (wctrans, wctrans_t, (const char *name)); +# endif +_GL_CXXALIAS_SYS (wctrans, wctrans_t, (const char *name)); +_GL_CXXALIASWARN (wctrans); +#elif defined GNULIB_POSIXCHECK +# undef wctrans +# if HAVE_RAW_DECL_WCTRANS +_GL_WARN_ON_USE (wctrans, "wctrans is unportable - " + "use gnulib module wctrans for portability"); +# endif +#endif + +/* Perform a given case conversion on a wide character. + The argument WC must be either a wchar_t value or WEOF. + The argument DESC must have been returned by the wctrans() function. */ +#if @GNULIB_TOWCTRANS@ +# if !@HAVE_WCTRANS_T@ +_GL_FUNCDECL_SYS (towctrans, wint_t, (wint_t wc, wctrans_t desc)); +# endif +_GL_CXXALIAS_SYS (towctrans, wint_t, (wint_t wc, wctrans_t desc)); +_GL_CXXALIASWARN (towctrans); +#elif defined GNULIB_POSIXCHECK +# undef towctrans +# if HAVE_RAW_DECL_TOWCTRANS +_GL_WARN_ON_USE (towctrans, "towctrans is unportable - " + "use gnulib module towctrans for portability"); +# endif +#endif + +_GL_INLINE_HEADER_END + +#endif /* _@GUARD_PREFIX@_WCTYPE_H */ +#endif /* _@GUARD_PREFIX@_WCTYPE_H */ diff --git a/lib/xalloc-die.c b/lib/xalloc-die.c new file mode 100644 index 000000000..daa403b9c --- /dev/null +++ b/lib/xalloc-die.c @@ -0,0 +1,41 @@ +/* Report a memory allocation failure and exit. + + Copyright (C) 1997-2000, 2002-2004, 2006, 2009-2013 Free Software + Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "xalloc.h" + +#include + +#include "error.h" +#include "exitfail.h" + +#include "gettext.h" +#define _(msgid) gettext (msgid) + +void +xalloc_die (void) +{ + error (exit_failure, 0, "%s", _("memory exhausted")); + + /* _Noreturn cannot be given to error, since it may return if + its first argument is 0. To help compilers understand the + xalloc_die does not return, call abort. Also, the abort is a + safety feature if exit_failure is 0 (which shouldn't happen). */ + abort (); +} diff --git a/lib/xalloc-oversized.h b/lib/xalloc-oversized.h new file mode 100644 index 000000000..a971c78ad --- /dev/null +++ b/lib/xalloc-oversized.h @@ -0,0 +1,38 @@ +/* xalloc-oversized.h -- memory allocation size checking + + Copyright (C) 1990-2000, 2003-2004, 2006-2013 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef XALLOC_OVERSIZED_H_ +# define XALLOC_OVERSIZED_H_ + +# include + +/* Return 1 if an array of N objects, each of size S, cannot exist due + to size arithmetic overflow. S must be positive and N must be + nonnegative. This is a macro, not a function, so that it + works correctly even when SIZE_MAX < N. + + By gnulib convention, SIZE_MAX represents overflow in size + calculations, so the conservative dividend to use here is + SIZE_MAX - 1, since SIZE_MAX might represent an overflowed value. + However, malloc (SIZE_MAX) fails on all known hosts where + sizeof (ptrdiff_t) <= sizeof (size_t), so do not bother to test for + exactly-SIZE_MAX allocations on such hosts; this avoids a test and + branch when S is known to be 1. */ +# define xalloc_oversized(n, s) \ + ((size_t) (sizeof (ptrdiff_t) <= sizeof (size_t) ? -1 : -2) / (s) < (n)) + +#endif /* !XALLOC_OVERSIZED_H_ */ diff --git a/lib/xalloc.h b/lib/xalloc.h new file mode 100644 index 000000000..da7c4b6bb --- /dev/null +++ b/lib/xalloc.h @@ -0,0 +1,260 @@ +/* xalloc.h -- malloc with out-of-memory checking + + Copyright (C) 1990-2000, 2003-2004, 2006-2013 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef XALLOC_H_ +#define XALLOC_H_ + +#include + +#include "xalloc-oversized.h" + +_GL_INLINE_HEADER_BEGIN +#ifndef XALLOC_INLINE +# define XALLOC_INLINE _GL_INLINE +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + +#if __GNUC__ >= 3 +# define _GL_ATTRIBUTE_MALLOC __attribute__ ((__malloc__)) +#else +# define _GL_ATTRIBUTE_MALLOC +#endif + +#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) +# define _GL_ATTRIBUTE_ALLOC_SIZE(args) __attribute__ ((__alloc_size__ args)) +#else +# define _GL_ATTRIBUTE_ALLOC_SIZE(args) +#endif + +/* This function is always triggered when memory is exhausted. + It must be defined by the application, either explicitly + or by using gnulib's xalloc-die module. This is the + function to call when one wants the program to die because of a + memory allocation failure. */ +extern _Noreturn void xalloc_die (void); + +void *xmalloc (size_t s) + _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_ALLOC_SIZE ((1)); +void *xzalloc (size_t s) + _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_ALLOC_SIZE ((1)); +void *xcalloc (size_t n, size_t s) + _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_ALLOC_SIZE ((1, 2)); +void *xrealloc (void *p, size_t s) + _GL_ATTRIBUTE_ALLOC_SIZE ((2)); +void *x2realloc (void *p, size_t *pn); +void *xmemdup (void const *p, size_t s) + _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_ALLOC_SIZE ((2)); +char *xstrdup (char const *str) + _GL_ATTRIBUTE_MALLOC; + +/* In the following macros, T must be an elementary or structure/union or + typedef'ed type, or a pointer to such a type. To apply one of the + following macros to a function pointer or array type, you need to typedef + it first and use the typedef name. */ + +/* Allocate an object of type T dynamically, with error checking. */ +/* extern t *XMALLOC (typename t); */ +#define XMALLOC(t) ((t *) xmalloc (sizeof (t))) + +/* Allocate memory for N elements of type T, with error checking. */ +/* extern t *XNMALLOC (size_t n, typename t); */ +#define XNMALLOC(n, t) \ + ((t *) (sizeof (t) == 1 ? xmalloc (n) : xnmalloc (n, sizeof (t)))) + +/* Allocate an object of type T dynamically, with error checking, + and zero it. */ +/* extern t *XZALLOC (typename t); */ +#define XZALLOC(t) ((t *) xzalloc (sizeof (t))) + +/* Allocate memory for N elements of type T, with error checking, + and zero it. */ +/* extern t *XCALLOC (size_t n, typename t); */ +#define XCALLOC(n, t) \ + ((t *) (sizeof (t) == 1 ? xzalloc (n) : xcalloc (n, sizeof (t)))) + + +/* Allocate an array of N objects, each with S bytes of memory, + dynamically, with error checking. S must be nonzero. */ + +XALLOC_INLINE void *xnmalloc (size_t n, size_t s) + _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_ALLOC_SIZE ((1, 2)); +XALLOC_INLINE void * +xnmalloc (size_t n, size_t s) +{ + if (xalloc_oversized (n, s)) + xalloc_die (); + return xmalloc (n * s); +} + +/* Change the size of an allocated block of memory P to an array of N + objects each of S bytes, with error checking. S must be nonzero. */ + +XALLOC_INLINE void *xnrealloc (void *p, size_t n, size_t s) + _GL_ATTRIBUTE_ALLOC_SIZE ((2, 3)); +XALLOC_INLINE void * +xnrealloc (void *p, size_t n, size_t s) +{ + if (xalloc_oversized (n, s)) + xalloc_die (); + return xrealloc (p, n * s); +} + +/* If P is null, allocate a block of at least *PN such objects; + otherwise, reallocate P so that it contains more than *PN objects + each of S bytes. *PN must be nonzero unless P is null, and S must + be nonzero. Set *PN to the new number of objects, and return the + pointer to the new block. *PN is never set to zero, and the + returned pointer is never null. + + Repeated reallocations are guaranteed to make progress, either by + allocating an initial block with a nonzero size, or by allocating a + larger block. + + In the following implementation, nonzero sizes are increased by a + factor of approximately 1.5 so that repeated reallocations have + O(N) overall cost rather than O(N**2) cost, but the + specification for this function does not guarantee that rate. + + Here is an example of use: + + int *p = NULL; + size_t used = 0; + size_t allocated = 0; + + void + append_int (int value) + { + if (used == allocated) + p = x2nrealloc (p, &allocated, sizeof *p); + p[used++] = value; + } + + This causes x2nrealloc to allocate a block of some nonzero size the + first time it is called. + + To have finer-grained control over the initial size, set *PN to a + nonzero value before calling this function with P == NULL. For + example: + + int *p = NULL; + size_t used = 0; + size_t allocated = 0; + size_t allocated1 = 1000; + + void + append_int (int value) + { + if (used == allocated) + { + p = x2nrealloc (p, &allocated1, sizeof *p); + allocated = allocated1; + } + p[used++] = value; + } + + */ + +XALLOC_INLINE void * +x2nrealloc (void *p, size_t *pn, size_t s) +{ + size_t n = *pn; + + if (! p) + { + if (! n) + { + /* The approximate size to use for initial small allocation + requests, when the invoking code specifies an old size of + zero. This is the largest "small" request for the GNU C + library malloc. */ + enum { DEFAULT_MXFAST = 64 * sizeof (size_t) / 4 }; + + n = DEFAULT_MXFAST / s; + n += !n; + } + } + else + { + /* Set N = ceil (1.5 * N) so that progress is made if N == 1. + Check for overflow, so that N * S stays in size_t range. + The check is slightly conservative, but an exact check isn't + worth the trouble. */ + if ((size_t) -1 / 3 * 2 / s <= n) + xalloc_die (); + n += (n + 1) / 2; + } + + *pn = n; + return xrealloc (p, n * s); +} + +/* Return a pointer to a new buffer of N bytes. This is like xmalloc, + except it returns char *. */ + +XALLOC_INLINE char *xcharalloc (size_t n) + _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_ALLOC_SIZE ((1)); +XALLOC_INLINE char * +xcharalloc (size_t n) +{ + return XNMALLOC (n, char); +} + +#ifdef __cplusplus +} + +/* C++ does not allow conversions from void * to other pointer types + without a cast. Use templates to work around the problem when + possible. */ + +template inline T * +xrealloc (T *p, size_t s) +{ + return (T *) xrealloc ((void *) p, s); +} + +template inline T * +xnrealloc (T *p, size_t n, size_t s) +{ + return (T *) xnrealloc ((void *) p, n, s); +} + +template inline T * +x2realloc (T *p, size_t *pn) +{ + return (T *) x2realloc ((void *) p, pn); +} + +template inline T * +x2nrealloc (T *p, size_t *pn, size_t s) +{ + return (T *) x2nrealloc ((void *) p, pn, s); +} + +template inline T * +xmemdup (T const *p, size_t s) +{ + return (T *) xmemdup ((void const *) p, s); +} + +#endif + + +#endif /* !XALLOC_H_ */ diff --git a/lib/xmalloc.c b/lib/xmalloc.c new file mode 100644 index 000000000..57e34b7cd --- /dev/null +++ b/lib/xmalloc.c @@ -0,0 +1,122 @@ +/* xmalloc.c -- malloc with out of memory checking + + Copyright (C) 1990-2000, 2002-2006, 2008-2013 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#define XALLOC_INLINE _GL_EXTERN_INLINE + +#include "xalloc.h" + +#include +#include + +/* 1 if calloc is known to be compatible with GNU calloc. This + matters if we are not also using the calloc module, which defines + HAVE_CALLOC_GNU and supports the GNU API even on non-GNU platforms. */ +#if defined HAVE_CALLOC_GNU || (defined __GLIBC__ && !defined __UCLIBC__) +enum { HAVE_GNU_CALLOC = 1 }; +#else +enum { HAVE_GNU_CALLOC = 0 }; +#endif + +/* Allocate N bytes of memory dynamically, with error checking. */ + +void * +xmalloc (size_t n) +{ + void *p = malloc (n); + if (!p && n != 0) + xalloc_die (); + return p; +} + +/* Change the size of an allocated block of memory P to N bytes, + with error checking. */ + +void * +xrealloc (void *p, size_t n) +{ + if (!n && p) + { + /* The GNU and C99 realloc behaviors disagree here. Act like + GNU, even if the underlying realloc is C99. */ + free (p); + return NULL; + } + + p = realloc (p, n); + if (!p && n) + xalloc_die (); + return p; +} + +/* If P is null, allocate a block of at least *PN bytes; otherwise, + reallocate P so that it contains more than *PN bytes. *PN must be + nonzero unless P is null. Set *PN to the new block's size, and + return the pointer to the new block. *PN is never set to zero, and + the returned pointer is never null. */ + +void * +x2realloc (void *p, size_t *pn) +{ + return x2nrealloc (p, pn, 1); +} + +/* Allocate S bytes of zeroed memory dynamically, with error checking. + There's no need for xnzalloc (N, S), since it would be equivalent + to xcalloc (N, S). */ + +void * +xzalloc (size_t s) +{ + return memset (xmalloc (s), 0, s); +} + +/* Allocate zeroed memory for N elements of S bytes, with error + checking. S must be nonzero. */ + +void * +xcalloc (size_t n, size_t s) +{ + void *p; + /* Test for overflow, since some calloc implementations don't have + proper overflow checks. But omit overflow and size-zero tests if + HAVE_GNU_CALLOC, since GNU calloc catches overflow and never + returns NULL if successful. */ + if ((! HAVE_GNU_CALLOC && xalloc_oversized (n, s)) + || (! (p = calloc (n, s)) && (HAVE_GNU_CALLOC || n != 0))) + xalloc_die (); + return p; +} + +/* Clone an object P of size S, with error checking. There's no need + for xnmemdup (P, N, S), since xmemdup (P, N * S) works without any + need for an arithmetic overflow check. */ + +void * +xmemdup (void const *p, size_t s) +{ + return memcpy (xmalloc (s), p, s); +} + +/* Clone STRING. */ + +char * +xstrdup (char const *string) +{ + return xmemdup (string, strlen (string) + 1); +} diff --git a/m4/codeset.m4 b/m4/codeset.m4 new file mode 100644 index 000000000..c2761be2a --- /dev/null +++ b/m4/codeset.m4 @@ -0,0 +1,23 @@ +# codeset.m4 serial 5 (gettext-0.18.2) +dnl Copyright (C) 2000-2002, 2006, 2008-2013 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +dnl From Bruno Haible. + +AC_DEFUN([AM_LANGINFO_CODESET], +[ + AC_CACHE_CHECK([for nl_langinfo and CODESET], [am_cv_langinfo_codeset], + [AC_LINK_IFELSE( + [AC_LANG_PROGRAM( + [[#include ]], + [[char* cs = nl_langinfo(CODESET); return !cs;]])], + [am_cv_langinfo_codeset=yes], + [am_cv_langinfo_codeset=no]) + ]) + if test $am_cv_langinfo_codeset = yes; then + AC_DEFINE([HAVE_LANGINFO_CODESET], [1], + [Define if you have and nl_langinfo(CODESET).]) + fi +]) diff --git a/m4/configmake.m4 b/m4/configmake.m4 new file mode 100644 index 000000000..823ffc0dd --- /dev/null +++ b/m4/configmake.m4 @@ -0,0 +1,50 @@ +# configmake.m4 serial 1 +dnl Copyright (C) 2010-2013 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +# gl_CONFIGMAKE_PREP +# ------------------ +# Guarantee all of the standard directory variables, even when used with +# autoconf 2.59 (datarootdir wasn't supported until 2.59c) or automake +# 1.9.6 (pkglibexecdir wasn't supported until 1.10b.). +AC_DEFUN([gl_CONFIGMAKE_PREP], +[ + dnl Technically, datadir should default to datarootdir. But if + dnl autoconf is too old to provide datarootdir, then reversing the + dnl definition is a reasonable compromise. Only AC_SUBST a variable + dnl if it was not already defined earlier by autoconf. + if test "x$datarootdir" = x; then + AC_SUBST([datarootdir], ['${datadir}']) + fi + dnl Copy the approach used in autoconf 2.60. + if test "x$docdir" = x; then + AC_SUBST([docdir], [m4_ifset([AC_PACKAGE_TARNAME], + ['${datarootdir}/doc/${PACKAGE_TARNAME}'], + ['${datarootdir}/doc/${PACKAGE}'])]) + fi + dnl The remaining variables missing from autoconf 2.59 are easier. + if test "x$htmldir" = x; then + AC_SUBST([htmldir], ['${docdir}']) + fi + if test "x$dvidir" = x; then + AC_SUBST([dvidir], ['${docdir}']) + fi + if test "x$pdfdir" = x; then + AC_SUBST([pdfdir], ['${docdir}']) + fi + if test "x$psdir" = x; then + AC_SUBST([psdir], ['${docdir}']) + fi + if test "x$lispdir" = x; then + AC_SUBST([lispdir], ['${datarootdir}/emacs/site-lisp']) + fi + if test "x$localedir" = x; then + AC_SUBST([localedir], ['${datarootdir}/locale']) + fi + + dnl Automake 1.9.6 only lacks pkglibexecdir; and since 1.11 merely + dnl provides it without AC_SUBST, this blind use of AC_SUBST is safe. + AC_SUBST([pkglibexecdir], ['${libexecdir}/${PACKAGE}']) +]) diff --git a/m4/glibc21.m4 b/m4/glibc21.m4 new file mode 100644 index 000000000..613fb2a41 --- /dev/null +++ b/m4/glibc21.m4 @@ -0,0 +1,34 @@ +# glibc21.m4 serial 5 +dnl Copyright (C) 2000-2002, 2004, 2008, 2010-2013 Free Software Foundation, +dnl Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +# Test for the GNU C Library, version 2.1 or newer, or uClibc. +# From Bruno Haible. + +AC_DEFUN([gl_GLIBC21], + [ + AC_CACHE_CHECK([whether we are using the GNU C Library >= 2.1 or uClibc], + [ac_cv_gnu_library_2_1], + [AC_EGREP_CPP([Lucky], + [ +#include +#ifdef __GNU_LIBRARY__ + #if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 1) || (__GLIBC__ > 2) + Lucky GNU user + #endif +#endif +#ifdef __UCLIBC__ + Lucky user +#endif + ], + [ac_cv_gnu_library_2_1=yes], + [ac_cv_gnu_library_2_1=no]) + ] + ) + AC_SUBST([GLIBC21]) + GLIBC21="$ac_cv_gnu_library_2_1" + ] +) diff --git a/m4/gnulib-cache.m4 b/m4/gnulib-cache.m4 index 2c575c9f9..5af1a6e2c 100644 --- a/m4/gnulib-cache.m4 +++ b/m4/gnulib-cache.m4 @@ -27,14 +27,16 @@ # Specification in the form of a command-line invocation: -# gnulib-tool --import --dir=. --lib=libgnu --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=tools --no-conditional-dependencies --libtool --macro-prefix=gl --no-vc-files argp error gethrxtime mkstemp progname sys_wait +# gnulib-tool --import --dir=. --lib=libgnu --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=tools --no-conditional-dependencies --libtool --macro-prefix=gl --no-vc-files argmatch argp error gethrxtime isatty mkstemp progname sys_wait # Specification in the form of a few gnulib-tool.m4 macro invocations: gl_LOCAL_DIR([]) gl_MODULES([ + argmatch argp error gethrxtime + isatty mkstemp progname sys_wait diff --git a/m4/gnulib-comp.m4 b/m4/gnulib-comp.m4 index 3c3426acb..c9cb69c46 100644 --- a/m4/gnulib-comp.m4 +++ b/m4/gnulib-comp.m4 @@ -40,13 +40,19 @@ AC_DEFUN([gl_EARLY], AC_REQUIRE([gl_PROG_AR_RANLIB]) # Code from module alloca: # Code from module alloca-opt: + # Code from module argmatch: # Code from module argp: + # Code from module c-ctype: + # Code from module c-strcase: + # Code from module c-strcaseeq: # Code from module clock-time: + # Code from module configmake: # Code from module dirname-lgpl: # Code from module dosname: # Code from module double-slash-root: # Code from module errno: # Code from module error: + # Code from module exitfail: # Code from module extensions: AC_REQUIRE([gl_USE_SYSTEM_EXTENSIONS]) # Code from module extern-inline: @@ -60,11 +66,15 @@ AC_DEFUN([gl_EARLY], # Code from module gettimeofday: # Code from module include_next: # Code from module intprops: + # Code from module isatty: # Code from module largefile: AC_REQUIRE([AC_SYS_LARGEFILE]) + # Code from module localcharset: # Code from module lstat: # Code from module malloc-gnu: # Code from module malloc-posix: + # Code from module mbrtowc: + # Code from module mbsinit: # Code from module memchr: # Code from module mempcpy: # Code from module mkstemp: @@ -74,6 +84,9 @@ AC_DEFUN([gl_EARLY], # Code from module nocrash: # Code from module pathmax: # Code from module progname: + # Code from module quote: + # Code from module quotearg: + # Code from module quotearg-simple: # Code from module rawmemchr: # Code from module secure_getenv: # Code from module size_max: @@ -92,6 +105,7 @@ AC_DEFUN([gl_EARLY], # Code from module stdlib: # Code from module strcase: # Code from module strchrnul: + # Code from module streq: # Code from module strerror: # Code from module strerror-override: # Code from module string: @@ -111,6 +125,10 @@ AC_DEFUN([gl_EARLY], # Code from module verify: # Code from module vsnprintf: # Code from module wchar: + # Code from module wctype-h: + # Code from module xalloc: + # Code from module xalloc-die: + # Code from module xalloc-oversized: # Code from module xsize: ]) @@ -138,6 +156,7 @@ AC_SUBST([LTALLOCA]) [AM_][XGETTEXT_OPTION([--flag=argp_error:2:c-format]) AM_][XGETTEXT_OPTION([--flag=argp_failure:4:c-format])]) gl_CLOCK_TIME + gl_CONFIGMAKE_PREP gl_DIRNAME_LGPL gl_DOUBLE_SLASH_ROOT gl_HEADER_ERRNO_H @@ -187,7 +206,16 @@ AC_SUBST([LTALLOCA]) gl_PREREQ_GETTIMEOFDAY fi gl_SYS_TIME_MODULE_INDICATOR([gettimeofday]) + gl_FUNC_ISATTY + if test $REPLACE_ISATTY = 1; then + AC_LIBOBJ([isatty]) + gl_PREREQ_ISATTY + fi + gl_UNISTD_MODULE_INDICATOR([isatty]) AC_REQUIRE([gl_LARGEFILE]) + gl_LOCALCHARSET + LOCALCHARSET_TESTS_ENVIRONMENT="CHARSETALIASDIR=\"\$(abs_top_builddir)/$gl_source_base\"" + AC_SUBST([LOCALCHARSET_TESTS_ENVIRONMENT]) gl_FUNC_LSTAT if test $REPLACE_LSTAT = 1; then AC_LIBOBJ([lstat]) @@ -204,6 +232,18 @@ AC_SUBST([LTALLOCA]) AC_LIBOBJ([malloc]) fi gl_STDLIB_MODULE_INDICATOR([malloc-posix]) + gl_FUNC_MBRTOWC + if test $HAVE_MBRTOWC = 0 || test $REPLACE_MBRTOWC = 1; then + AC_LIBOBJ([mbrtowc]) + gl_PREREQ_MBRTOWC + fi + gl_WCHAR_MODULE_INDICATOR([mbrtowc]) + gl_FUNC_MBSINIT + if test $HAVE_MBSINIT = 0 || test $REPLACE_MBSINIT = 1; then + AC_LIBOBJ([mbsinit]) + gl_PREREQ_MBSINIT + fi + gl_WCHAR_MODULE_INDICATOR([mbsinit]) gl_FUNC_MEMCHR if test $HAVE_MEMCHR = 0 || test $REPLACE_MEMCHR = 1; then AC_LIBOBJ([memchr]) @@ -234,6 +274,8 @@ AC_SUBST([LTALLOCA]) gl_PATHMAX AC_CHECK_DECLS([program_invocation_name], [], [], [#include ]) AC_CHECK_DECLS([program_invocation_short_name], [], [], [#include ]) + gl_QUOTE + gl_QUOTEARG gl_FUNC_RAWMEMCHR if test $HAVE_RAWMEMCHR = 0; then AC_LIBOBJ([rawmemchr]) @@ -322,6 +364,8 @@ AC_SUBST([LTALLOCA]) gl_FUNC_VSNPRINTF gl_STDIO_MODULE_INDICATOR([vsnprintf]) gl_WCHAR_H + gl_WCTYPE_H + gl_XALLOC gl_XSIZE # End of code from modules m4_ifval(gl_LIBSOURCES_LIST, [ @@ -465,6 +509,8 @@ AC_DEFUN([gl_FILE_LIST], [ build-aux/snippet/warn-on-use.h lib/alloca.c lib/alloca.in.h + lib/argmatch.c + lib/argmatch.h lib/argp-ba.c lib/argp-eexst.c lib/argp-fmtstream.c @@ -480,12 +526,21 @@ AC_DEFUN([gl_FILE_LIST], [ lib/argp.h lib/asnprintf.c lib/basename-lgpl.c + lib/c-ctype.c + lib/c-ctype.h + lib/c-strcase.h + lib/c-strcasecmp.c + lib/c-strcaseeq.h + lib/c-strncasecmp.c + lib/config.charset lib/dirname-lgpl.c lib/dirname.h lib/dosname.h lib/errno.in.h lib/error.c lib/error.h + lib/exitfail.c + lib/exitfail.h lib/fcntl.in.h lib/float+.h lib/float.c @@ -500,9 +555,14 @@ AC_DEFUN([gl_FILE_LIST], [ lib/gettime.c lib/gettimeofday.c lib/intprops.h + lib/isatty.c lib/itold.c + lib/localcharset.c + lib/localcharset.h lib/lstat.c lib/malloc.c + lib/mbrtowc.c + lib/mbsinit.c lib/memchr.c lib/memchr.valgrind lib/mempcpy.c @@ -518,8 +578,13 @@ AC_DEFUN([gl_FILE_LIST], [ lib/printf-parse.h lib/progname.c lib/progname.h + lib/quote.h + lib/quotearg.c + lib/quotearg.h lib/rawmemchr.c lib/rawmemchr.valgrind + lib/ref-add.sin + lib/ref-del.sin lib/secure_getenv.c lib/size_max.h lib/sleep.c @@ -533,6 +598,7 @@ AC_DEFUN([gl_FILE_LIST], [ lib/strcasecmp.c lib/strchrnul.c lib/strchrnul.valgrind + lib/streq.h lib/strerror-override.c lib/strerror-override.h lib/strerror.c @@ -559,6 +625,12 @@ AC_DEFUN([gl_FILE_LIST], [ lib/verify.h lib/vsnprintf.c lib/wchar.in.h + lib/wctype-h.c + lib/wctype.in.h + lib/xalloc-die.c + lib/xalloc-oversized.h + lib/xalloc.h + lib/xmalloc.c lib/xsize.c lib/xsize.h lib/xtime.c @@ -567,6 +639,8 @@ AC_DEFUN([gl_FILE_LIST], [ m4/alloca.m4 m4/argp.m4 m4/clock_time.m4 + m4/codeset.m4 + m4/configmake.m4 m4/dirname.m4 m4/double-slash-root.m4 m4/errno_h.m4 @@ -581,15 +655,24 @@ AC_DEFUN([gl_FILE_LIST], [ m4/getopt.m4 m4/gettime.m4 m4/gettimeofday.m4 + m4/glibc21.m4 m4/gnulib-common.m4 m4/include_next.m4 m4/intmax_t.m4 m4/inttypes_h.m4 + m4/isatty.m4 m4/largefile.m4 + m4/localcharset.m4 + m4/locale-fr.m4 + m4/locale-ja.m4 + m4/locale-zh.m4 m4/longlong.m4 m4/lstat.m4 m4/malloc.m4 m4/math_h.m4 + m4/mbrtowc.m4 + m4/mbsinit.m4 + m4/mbstate_t.m4 m4/memchr.m4 m4/mempcpy.m4 m4/mkstemp.m4 @@ -601,6 +684,8 @@ AC_DEFUN([gl_FILE_LIST], [ m4/off_t.m4 m4/pathmax.m4 m4/printf.m4 + m4/quote.m4 + m4/quotearg.m4 m4/rawmemchr.m4 m4/secure_getenv.m4 m4/size_max.m4 @@ -636,6 +721,8 @@ AC_DEFUN([gl_FILE_LIST], [ m4/warn-on-use.m4 m4/wchar_h.m4 m4/wchar_t.m4 + m4/wctype_h.m4 m4/wint_t.m4 + m4/xalloc.m4 m4/xsize.m4 ]) diff --git a/m4/isatty.m4 b/m4/isatty.m4 new file mode 100644 index 000000000..cac823116 --- /dev/null +++ b/m4/isatty.m4 @@ -0,0 +1,19 @@ +# isatty.m4 serial 3 +dnl Copyright (C) 2012-2013 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +AC_DEFUN([gl_FUNC_ISATTY], +[ + AC_REQUIRE([gl_UNISTD_H_DEFAULTS]) + AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles + dnl On native Windows, the system's isatty(), defined as an alias of _isatty() + dnl in the "oldnames" library, returns true for the NUL device. + case $host_os in + mingw*) REPLACE_ISATTY=1 ;; + esac +]) + +# Prerequisites of lib/isatty.c. +AC_DEFUN([gl_PREREQ_ISATTY], [:]) diff --git a/m4/localcharset.m4 b/m4/localcharset.m4 new file mode 100644 index 000000000..2e93e5818 --- /dev/null +++ b/m4/localcharset.m4 @@ -0,0 +1,17 @@ +# localcharset.m4 serial 7 +dnl Copyright (C) 2002, 2004, 2006, 2009-2013 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +AC_DEFUN([gl_LOCALCHARSET], +[ + dnl Prerequisites of lib/localcharset.c. + AC_REQUIRE([AM_LANGINFO_CODESET]) + AC_REQUIRE([gl_FCNTL_O_FLAGS]) + AC_CHECK_DECLS_ONCE([getc_unlocked]) + + dnl Prerequisites of the lib/Makefile.am snippet. + AC_REQUIRE([AC_CANONICAL_HOST]) + AC_REQUIRE([gl_GLIBC21]) +]) diff --git a/m4/locale-fr.m4 b/m4/locale-fr.m4 new file mode 100644 index 000000000..ef199e397 --- /dev/null +++ b/m4/locale-fr.m4 @@ -0,0 +1,250 @@ +# locale-fr.m4 serial 17 +dnl Copyright (C) 2003, 2005-2013 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +dnl From Bruno Haible. + +dnl Determine the name of a french locale with traditional encoding. +AC_DEFUN([gt_LOCALE_FR], +[ + AC_REQUIRE([AC_CANONICAL_HOST]) + AC_REQUIRE([AM_LANGINFO_CODESET]) + AC_CACHE_CHECK([for a traditional french locale], [gt_cv_locale_fr], [ + AC_LANG_CONFTEST([AC_LANG_SOURCE([ +changequote(,)dnl +#include +#include +#if HAVE_LANGINFO_CODESET +# include +#endif +#include +#include +struct tm t; +char buf[16]; +int main () { + /* Check whether the given locale name is recognized by the system. */ +#if (defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__ + /* On native Windows, setlocale(category, "") looks at the system settings, + not at the environment variables. Also, when an encoding suffix such + as ".65001" or ".54936" is specified, it succeeds but sets the LC_CTYPE + category of the locale to "C". */ + if (setlocale (LC_ALL, getenv ("LC_ALL")) == NULL + || strcmp (setlocale (LC_CTYPE, NULL), "C") == 0) + return 1; +#else + if (setlocale (LC_ALL, "") == NULL) return 1; +#endif + /* Check whether nl_langinfo(CODESET) is nonempty and not "ASCII" or "646". + On Mac OS X 10.3.5 (Darwin 7.5) in the fr_FR locale, nl_langinfo(CODESET) + is empty, and the behaviour of Tcl 8.4 in this locale is not useful. + On OpenBSD 4.0, when an unsupported locale is specified, setlocale() + succeeds but then nl_langinfo(CODESET) is "646". In this situation, + some unit tests fail. + On MirBSD 10, when an unsupported locale is specified, setlocale() + succeeds but then nl_langinfo(CODESET) is "UTF-8". */ +#if HAVE_LANGINFO_CODESET + { + const char *cs = nl_langinfo (CODESET); + if (cs[0] == '\0' || strcmp (cs, "ASCII") == 0 || strcmp (cs, "646") == 0 + || strcmp (cs, "UTF-8") == 0) + return 1; + } +#endif +#ifdef __CYGWIN__ + /* On Cygwin, avoid locale names without encoding suffix, because the + locale_charset() function relies on the encoding suffix. Note that + LC_ALL is set on the command line. */ + if (strchr (getenv ("LC_ALL"), '.') == NULL) return 1; +#endif + /* Check whether in the abbreviation of the second month, the second + character (should be U+00E9: LATIN SMALL LETTER E WITH ACUTE) is only + one byte long. This excludes the UTF-8 encoding. */ + t.tm_year = 1975 - 1900; t.tm_mon = 2 - 1; t.tm_mday = 4; + if (strftime (buf, sizeof (buf), "%b", &t) < 3 || buf[2] != 'v') return 1; +#if !defined __BIONIC__ /* Bionic libc's 'struct lconv' is just a dummy. */ + /* Check whether the decimal separator is a comma. + On NetBSD 3.0 in the fr_FR.ISO8859-1 locale, localeconv()->decimal_point + are nl_langinfo(RADIXCHAR) are both ".". */ + if (localeconv () ->decimal_point[0] != ',') return 1; +#endif + return 0; +} +changequote([,])dnl + ])]) + if AC_TRY_EVAL([ac_link]) && test -s conftest$ac_exeext; then + case "$host_os" in + # Handle native Windows specially, because there setlocale() interprets + # "ar" as "Arabic" or "Arabic_Saudi Arabia.1256", + # "fr" or "fra" as "French" or "French_France.1252", + # "ge"(!) or "deu"(!) as "German" or "German_Germany.1252", + # "ja" as "Japanese" or "Japanese_Japan.932", + # and similar. + mingw*) + # Test for the native Windows locale name. + if (LC_ALL=French_France.1252 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_fr=French_France.1252 + else + # None found. + gt_cv_locale_fr=none + fi + ;; + *) + # Setting LC_ALL is not enough. Need to set LC_TIME to empty, because + # otherwise on Mac OS X 10.3.5 the LC_TIME=C from the beginning of the + # configure script would override the LC_ALL setting. Likewise for + # LC_CTYPE, which is also set at the beginning of the configure script. + # Test for the usual locale name. + if (LC_ALL=fr_FR LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_fr=fr_FR + else + # Test for the locale name with explicit encoding suffix. + if (LC_ALL=fr_FR.ISO-8859-1 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_fr=fr_FR.ISO-8859-1 + else + # Test for the AIX, OSF/1, FreeBSD, NetBSD, OpenBSD locale name. + if (LC_ALL=fr_FR.ISO8859-1 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_fr=fr_FR.ISO8859-1 + else + # Test for the HP-UX locale name. + if (LC_ALL=fr_FR.iso88591 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_fr=fr_FR.iso88591 + else + # Test for the Solaris 7 locale name. + if (LC_ALL=fr LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_fr=fr + else + # None found. + gt_cv_locale_fr=none + fi + fi + fi + fi + fi + ;; + esac + fi + rm -fr conftest* + ]) + LOCALE_FR=$gt_cv_locale_fr + AC_SUBST([LOCALE_FR]) +]) + +dnl Determine the name of a french locale with UTF-8 encoding. +AC_DEFUN([gt_LOCALE_FR_UTF8], +[ + AC_REQUIRE([AM_LANGINFO_CODESET]) + AC_CACHE_CHECK([for a french Unicode locale], [gt_cv_locale_fr_utf8], [ + AC_LANG_CONFTEST([AC_LANG_SOURCE([ +changequote(,)dnl +#include +#include +#if HAVE_LANGINFO_CODESET +# include +#endif +#include +#include +struct tm t; +char buf[16]; +int main () { + /* On BeOS and Haiku, locales are not implemented in libc. Rather, libintl + imitates locale dependent behaviour by looking at the environment + variables, and all locales use the UTF-8 encoding. */ +#if !(defined __BEOS__ || defined __HAIKU__) + /* Check whether the given locale name is recognized by the system. */ +# if (defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__ + /* On native Windows, setlocale(category, "") looks at the system settings, + not at the environment variables. Also, when an encoding suffix such + as ".65001" or ".54936" is specified, it succeeds but sets the LC_CTYPE + category of the locale to "C". */ + if (setlocale (LC_ALL, getenv ("LC_ALL")) == NULL + || strcmp (setlocale (LC_CTYPE, NULL), "C") == 0) + return 1; +# else + if (setlocale (LC_ALL, "") == NULL) return 1; +# endif + /* Check whether nl_langinfo(CODESET) is nonempty and not "ASCII" or "646". + On Mac OS X 10.3.5 (Darwin 7.5) in the fr_FR locale, nl_langinfo(CODESET) + is empty, and the behaviour of Tcl 8.4 in this locale is not useful. + On OpenBSD 4.0, when an unsupported locale is specified, setlocale() + succeeds but then nl_langinfo(CODESET) is "646". In this situation, + some unit tests fail. */ +# if HAVE_LANGINFO_CODESET + { + const char *cs = nl_langinfo (CODESET); + if (cs[0] == '\0' || strcmp (cs, "ASCII") == 0 || strcmp (cs, "646") == 0) + return 1; + } +# endif +# ifdef __CYGWIN__ + /* On Cygwin, avoid locale names without encoding suffix, because the + locale_charset() function relies on the encoding suffix. Note that + LC_ALL is set on the command line. */ + if (strchr (getenv ("LC_ALL"), '.') == NULL) return 1; +# endif + /* Check whether in the abbreviation of the second month, the second + character (should be U+00E9: LATIN SMALL LETTER E WITH ACUTE) is + two bytes long, with UTF-8 encoding. */ + t.tm_year = 1975 - 1900; t.tm_mon = 2 - 1; t.tm_mday = 4; + if (strftime (buf, sizeof (buf), "%b", &t) < 4 + || buf[1] != (char) 0xc3 || buf[2] != (char) 0xa9 || buf[3] != 'v') + return 1; +#endif +#if !defined __BIONIC__ /* Bionic libc's 'struct lconv' is just a dummy. */ + /* Check whether the decimal separator is a comma. + On NetBSD 3.0 in the fr_FR.ISO8859-1 locale, localeconv()->decimal_point + are nl_langinfo(RADIXCHAR) are both ".". */ + if (localeconv () ->decimal_point[0] != ',') return 1; +#endif + return 0; +} +changequote([,])dnl + ])]) + if AC_TRY_EVAL([ac_link]) && test -s conftest$ac_exeext; then + case "$host_os" in + # Handle native Windows specially, because there setlocale() interprets + # "ar" as "Arabic" or "Arabic_Saudi Arabia.1256", + # "fr" or "fra" as "French" or "French_France.1252", + # "ge"(!) or "deu"(!) as "German" or "German_Germany.1252", + # "ja" as "Japanese" or "Japanese_Japan.932", + # and similar. + mingw*) + # Test for the hypothetical native Windows locale name. + if (LC_ALL=French_France.65001 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_fr_utf8=French_France.65001 + else + # None found. + gt_cv_locale_fr_utf8=none + fi + ;; + *) + # Setting LC_ALL is not enough. Need to set LC_TIME to empty, because + # otherwise on Mac OS X 10.3.5 the LC_TIME=C from the beginning of the + # configure script would override the LC_ALL setting. Likewise for + # LC_CTYPE, which is also set at the beginning of the configure script. + # Test for the usual locale name. + if (LC_ALL=fr_FR LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_fr_utf8=fr_FR + else + # Test for the locale name with explicit encoding suffix. + if (LC_ALL=fr_FR.UTF-8 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_fr_utf8=fr_FR.UTF-8 + else + # Test for the Solaris 7 locale name. + if (LC_ALL=fr.UTF-8 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_fr_utf8=fr.UTF-8 + else + # None found. + gt_cv_locale_fr_utf8=none + fi + fi + fi + ;; + esac + fi + rm -fr conftest* + ]) + LOCALE_FR_UTF8=$gt_cv_locale_fr_utf8 + AC_SUBST([LOCALE_FR_UTF8]) +]) diff --git a/m4/locale-ja.m4 b/m4/locale-ja.m4 new file mode 100644 index 000000000..132a3e779 --- /dev/null +++ b/m4/locale-ja.m4 @@ -0,0 +1,136 @@ +# locale-ja.m4 serial 12 +dnl Copyright (C) 2003, 2005-2013 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +dnl From Bruno Haible. + +dnl Determine the name of a japanese locale with EUC-JP encoding. +AC_DEFUN([gt_LOCALE_JA], +[ + AC_REQUIRE([AC_CANONICAL_HOST]) + AC_REQUIRE([AM_LANGINFO_CODESET]) + AC_CACHE_CHECK([for a traditional japanese locale], [gt_cv_locale_ja], [ + AC_LANG_CONFTEST([AC_LANG_SOURCE([ +changequote(,)dnl +#include +#include +#if HAVE_LANGINFO_CODESET +# include +#endif +#include +#include +struct tm t; +char buf[16]; +int main () +{ + const char *p; + /* Check whether the given locale name is recognized by the system. */ +#if (defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__ + /* On native Windows, setlocale(category, "") looks at the system settings, + not at the environment variables. Also, when an encoding suffix such + as ".65001" or ".54936" is specified, it succeeds but sets the LC_CTYPE + category of the locale to "C". */ + if (setlocale (LC_ALL, getenv ("LC_ALL")) == NULL + || strcmp (setlocale (LC_CTYPE, NULL), "C") == 0) + return 1; +#else + if (setlocale (LC_ALL, "") == NULL) return 1; +#endif + /* Check whether nl_langinfo(CODESET) is nonempty and not "ASCII" or "646". + On Mac OS X 10.3.5 (Darwin 7.5) in the fr_FR locale, nl_langinfo(CODESET) + is empty, and the behaviour of Tcl 8.4 in this locale is not useful. + On OpenBSD 4.0, when an unsupported locale is specified, setlocale() + succeeds but then nl_langinfo(CODESET) is "646". In this situation, + some unit tests fail. + On MirBSD 10, when an unsupported locale is specified, setlocale() + succeeds but then nl_langinfo(CODESET) is "UTF-8". */ +#if HAVE_LANGINFO_CODESET + { + const char *cs = nl_langinfo (CODESET); + if (cs[0] == '\0' || strcmp (cs, "ASCII") == 0 || strcmp (cs, "646") == 0 + || strcmp (cs, "UTF-8") == 0) + return 1; + } +#endif +#ifdef __CYGWIN__ + /* On Cygwin, avoid locale names without encoding suffix, because the + locale_charset() function relies on the encoding suffix. Note that + LC_ALL is set on the command line. */ + if (strchr (getenv ("LC_ALL"), '.') == NULL) return 1; +#endif + /* Check whether MB_CUR_MAX is > 1. This excludes the dysfunctional locales + on Cygwin 1.5.x. */ + if (MB_CUR_MAX == 1) + return 1; + /* Check whether in a month name, no byte in the range 0x80..0x9F occurs. + This excludes the UTF-8 encoding (except on MirBSD). */ + t.tm_year = 1975 - 1900; t.tm_mon = 2 - 1; t.tm_mday = 4; + if (strftime (buf, sizeof (buf), "%B", &t) < 2) return 1; + for (p = buf; *p != '\0'; p++) + if ((unsigned char) *p >= 0x80 && (unsigned char) *p < 0xa0) + return 1; + return 0; +} +changequote([,])dnl + ])]) + if AC_TRY_EVAL([ac_link]) && test -s conftest$ac_exeext; then + case "$host_os" in + # Handle native Windows specially, because there setlocale() interprets + # "ar" as "Arabic" or "Arabic_Saudi Arabia.1256", + # "fr" or "fra" as "French" or "French_France.1252", + # "ge"(!) or "deu"(!) as "German" or "German_Germany.1252", + # "ja" as "Japanese" or "Japanese_Japan.932", + # and similar. + mingw*) + # Note that on native Windows, the Japanese locale is + # Japanese_Japan.932, and CP932 is very different from EUC-JP, so we + # cannot use it here. + gt_cv_locale_ja=none + ;; + *) + # Setting LC_ALL is not enough. Need to set LC_TIME to empty, because + # otherwise on Mac OS X 10.3.5 the LC_TIME=C from the beginning of the + # configure script would override the LC_ALL setting. Likewise for + # LC_CTYPE, which is also set at the beginning of the configure script. + # Test for the AIX locale name. + if (LC_ALL=ja_JP LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_ja=ja_JP + else + # Test for the locale name with explicit encoding suffix. + if (LC_ALL=ja_JP.EUC-JP LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_ja=ja_JP.EUC-JP + else + # Test for the HP-UX, OSF/1, NetBSD locale name. + if (LC_ALL=ja_JP.eucJP LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_ja=ja_JP.eucJP + else + # Test for the IRIX, FreeBSD locale name. + if (LC_ALL=ja_JP.EUC LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_ja=ja_JP.EUC + else + # Test for the Solaris 7 locale name. + if (LC_ALL=ja LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_ja=ja + else + # Special test for NetBSD 1.6. + if test -f /usr/share/locale/ja_JP.eucJP/LC_CTYPE; then + gt_cv_locale_ja=ja_JP.eucJP + else + # None found. + gt_cv_locale_ja=none + fi + fi + fi + fi + fi + fi + ;; + esac + fi + rm -fr conftest* + ]) + LOCALE_JA=$gt_cv_locale_ja + AC_SUBST([LOCALE_JA]) +]) diff --git a/m4/locale-zh.m4 b/m4/locale-zh.m4 new file mode 100644 index 000000000..4eed73f40 --- /dev/null +++ b/m4/locale-zh.m4 @@ -0,0 +1,130 @@ +# locale-zh.m4 serial 12 +dnl Copyright (C) 2003, 2005-2013 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +dnl From Bruno Haible. + +dnl Determine the name of a chinese locale with GB18030 encoding. +AC_DEFUN([gt_LOCALE_ZH_CN], +[ + AC_REQUIRE([AC_CANONICAL_HOST]) + AC_REQUIRE([AM_LANGINFO_CODESET]) + AC_CACHE_CHECK([for a transitional chinese locale], [gt_cv_locale_zh_CN], [ + AC_LANG_CONFTEST([AC_LANG_SOURCE([ +changequote(,)dnl +#include +#include +#include +#if HAVE_LANGINFO_CODESET +# include +#endif +#include +#include +struct tm t; +char buf[16]; +int main () +{ + const char *p; + /* Check whether the given locale name is recognized by the system. */ +#if (defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__ + /* On native Windows, setlocale(category, "") looks at the system settings, + not at the environment variables. Also, when an encoding suffix such + as ".65001" or ".54936" is specified, it succeeds but sets the LC_CTYPE + category of the locale to "C". */ + if (setlocale (LC_ALL, getenv ("LC_ALL")) == NULL + || strcmp (setlocale (LC_CTYPE, NULL), "C") == 0) + return 1; +#else + if (setlocale (LC_ALL, "") == NULL) return 1; +#endif + /* Check whether nl_langinfo(CODESET) is nonempty and not "ASCII" or "646". + On Mac OS X 10.3.5 (Darwin 7.5) in the fr_FR locale, nl_langinfo(CODESET) + is empty, and the behaviour of Tcl 8.4 in this locale is not useful. + On OpenBSD 4.0, when an unsupported locale is specified, setlocale() + succeeds but then nl_langinfo(CODESET) is "646". In this situation, + some unit tests fail. + On MirBSD 10, when an unsupported locale is specified, setlocale() + succeeds but then nl_langinfo(CODESET) is "UTF-8". */ +#if HAVE_LANGINFO_CODESET + { + const char *cs = nl_langinfo (CODESET); + if (cs[0] == '\0' || strcmp (cs, "ASCII") == 0 || strcmp (cs, "646") == 0 + || strcmp (cs, "UTF-8") == 0) + return 1; + } +#endif +#ifdef __CYGWIN__ + /* On Cygwin, avoid locale names without encoding suffix, because the + locale_charset() function relies on the encoding suffix. Note that + LC_ALL is set on the command line. */ + if (strchr (getenv ("LC_ALL"), '.') == NULL) return 1; +#endif + /* Check whether in a month name, no byte in the range 0x80..0x9F occurs. + This excludes the UTF-8 encoding (except on MirBSD). */ + t.tm_year = 1975 - 1900; t.tm_mon = 2 - 1; t.tm_mday = 4; + if (strftime (buf, sizeof (buf), "%B", &t) < 2) return 1; + for (p = buf; *p != '\0'; p++) + if ((unsigned char) *p >= 0x80 && (unsigned char) *p < 0xa0) + return 1; + /* Check whether a typical GB18030 multibyte sequence is recognized as a + single wide character. This excludes the GB2312 and GBK encodings. */ + if (mblen ("\203\062\332\066", 5) != 4) + return 1; + return 0; +} +changequote([,])dnl + ])]) + if AC_TRY_EVAL([ac_link]) && test -s conftest$ac_exeext; then + case "$host_os" in + # Handle native Windows specially, because there setlocale() interprets + # "ar" as "Arabic" or "Arabic_Saudi Arabia.1256", + # "fr" or "fra" as "French" or "French_France.1252", + # "ge"(!) or "deu"(!) as "German" or "German_Germany.1252", + # "ja" as "Japanese" or "Japanese_Japan.932", + # and similar. + mingw*) + # Test for the hypothetical native Windows locale name. + if (LC_ALL=Chinese_China.54936 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_zh_CN=Chinese_China.54936 + else + # None found. + gt_cv_locale_zh_CN=none + fi + ;; + solaris2.8) + # On Solaris 8, the locales zh_CN.GB18030, zh_CN.GBK, zh.GBK are + # broken. One witness is the test case in gl_MBRTOWC_SANITYCHECK. + # Another witness is that "LC_ALL=zh_CN.GB18030 bash -c true" dumps core. + gt_cv_locale_zh_CN=none + ;; + *) + # Setting LC_ALL is not enough. Need to set LC_TIME to empty, because + # otherwise on Mac OS X 10.3.5 the LC_TIME=C from the beginning of the + # configure script would override the LC_ALL setting. Likewise for + # LC_CTYPE, which is also set at the beginning of the configure script. + # Test for the locale name without encoding suffix. + if (LC_ALL=zh_CN LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_zh_CN=zh_CN + else + # Test for the locale name with explicit encoding suffix. + if (LC_ALL=zh_CN.GB18030 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_zh_CN=zh_CN.GB18030 + else + # None found. + gt_cv_locale_zh_CN=none + fi + fi + ;; + esac + else + # If there was a link error, due to mblen(), the system is so old that + # it certainly doesn't have a chinese locale. + gt_cv_locale_zh_CN=none + fi + rm -fr conftest* + ]) + LOCALE_ZH_CN=$gt_cv_locale_zh_CN + AC_SUBST([LOCALE_ZH_CN]) +]) diff --git a/m4/mbrtowc.m4 b/m4/mbrtowc.m4 new file mode 100644 index 000000000..4c9f38861 --- /dev/null +++ b/m4/mbrtowc.m4 @@ -0,0 +1,572 @@ +# mbrtowc.m4 serial 25 +dnl Copyright (C) 2001-2002, 2004-2005, 2008-2013 Free Software Foundation, +dnl Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +AC_DEFUN([gl_FUNC_MBRTOWC], +[ + AC_REQUIRE([gl_WCHAR_H_DEFAULTS]) + + AC_REQUIRE([AC_TYPE_MBSTATE_T]) + gl_MBSTATE_T_BROKEN + + AC_CHECK_FUNCS_ONCE([mbrtowc]) + if test $ac_cv_func_mbrtowc = no; then + HAVE_MBRTOWC=0 + AC_CHECK_DECLS([mbrtowc],,, [[ +/* Tru64 with Desktop Toolkit C has a bug: must be included before + . + BSD/OS 4.0.1 has a bug: , and must be + included before . */ +#include +#include +#include +#include +]]) + if test $ac_cv_have_decl_mbrtowc = yes; then + dnl On Minix 3.1.8, the system's declares mbrtowc() although + dnl it does not have the function. Avoid a collision with gnulib's + dnl replacement. + REPLACE_MBRTOWC=1 + fi + else + if test $REPLACE_MBSTATE_T = 1; then + REPLACE_MBRTOWC=1 + else + gl_MBRTOWC_NULL_ARG1 + gl_MBRTOWC_NULL_ARG2 + gl_MBRTOWC_RETVAL + gl_MBRTOWC_NUL_RETVAL + case "$gl_cv_func_mbrtowc_null_arg1" in + *yes) ;; + *) AC_DEFINE([MBRTOWC_NULL_ARG1_BUG], [1], + [Define if the mbrtowc function has the NULL pwc argument bug.]) + REPLACE_MBRTOWC=1 + ;; + esac + case "$gl_cv_func_mbrtowc_null_arg2" in + *yes) ;; + *) AC_DEFINE([MBRTOWC_NULL_ARG2_BUG], [1], + [Define if the mbrtowc function has the NULL string argument bug.]) + REPLACE_MBRTOWC=1 + ;; + esac + case "$gl_cv_func_mbrtowc_retval" in + *yes) ;; + *) AC_DEFINE([MBRTOWC_RETVAL_BUG], [1], + [Define if the mbrtowc function returns a wrong return value.]) + REPLACE_MBRTOWC=1 + ;; + esac + case "$gl_cv_func_mbrtowc_nul_retval" in + *yes) ;; + *) AC_DEFINE([MBRTOWC_NUL_RETVAL_BUG], [1], + [Define if the mbrtowc function does not return 0 for a NUL character.]) + REPLACE_MBRTOWC=1 + ;; + esac + fi + fi +]) + +dnl Test whether mbsinit() and mbrtowc() need to be overridden in a way that +dnl redefines the semantics of the given mbstate_t type. +dnl Result is REPLACE_MBSTATE_T. +dnl When this is set to 1, we replace both mbsinit() and mbrtowc(), in order to +dnl avoid inconsistencies. + +AC_DEFUN([gl_MBSTATE_T_BROKEN], +[ + AC_REQUIRE([gl_WCHAR_H_DEFAULTS]) + + AC_REQUIRE([AC_TYPE_MBSTATE_T]) + AC_CHECK_FUNCS_ONCE([mbsinit]) + AC_CHECK_FUNCS_ONCE([mbrtowc]) + if test $ac_cv_func_mbsinit = yes && test $ac_cv_func_mbrtowc = yes; then + gl_MBRTOWC_INCOMPLETE_STATE + gl_MBRTOWC_SANITYCHECK + REPLACE_MBSTATE_T=0 + case "$gl_cv_func_mbrtowc_incomplete_state" in + *yes) ;; + *) REPLACE_MBSTATE_T=1 ;; + esac + case "$gl_cv_func_mbrtowc_sanitycheck" in + *yes) ;; + *) REPLACE_MBSTATE_T=1 ;; + esac + else + REPLACE_MBSTATE_T=1 + fi +]) + +dnl Test whether mbrtowc puts the state into non-initial state when parsing an +dnl incomplete multibyte character. +dnl Result is gl_cv_func_mbrtowc_incomplete_state. + +AC_DEFUN([gl_MBRTOWC_INCOMPLETE_STATE], +[ + AC_REQUIRE([AC_PROG_CC]) + AC_REQUIRE([gt_LOCALE_JA]) + AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles + AC_CACHE_CHECK([whether mbrtowc handles incomplete characters], + [gl_cv_func_mbrtowc_incomplete_state], + [ + dnl Initial guess, used when cross-compiling or when no suitable locale + dnl is present. +changequote(,)dnl + case "$host_os" in + # Guess no on AIX and OSF/1. + aix* | osf*) gl_cv_func_mbrtowc_incomplete_state="guessing no" ;; + # Guess yes otherwise. + *) gl_cv_func_mbrtowc_incomplete_state="guessing yes" ;; + esac +changequote([,])dnl + if test $LOCALE_JA != none; then + AC_RUN_IFELSE( + [AC_LANG_SOURCE([[ +#include +#include +/* Tru64 with Desktop Toolkit C has a bug: must be included before + . + BSD/OS 4.0.1 has a bug: , and must be + included before . */ +#include +#include +#include +#include +int main () +{ + if (setlocale (LC_ALL, "$LOCALE_JA") != NULL) + { + const char input[] = "B\217\253\344\217\251\316er"; /* "Büßer" */ + mbstate_t state; + wchar_t wc; + + memset (&state, '\0', sizeof (mbstate_t)); + if (mbrtowc (&wc, input + 1, 1, &state) == (size_t)(-2)) + if (mbsinit (&state)) + return 1; + } + return 0; +}]])], + [gl_cv_func_mbrtowc_incomplete_state=yes], + [gl_cv_func_mbrtowc_incomplete_state=no], + [:]) + fi + ]) +]) + +dnl Test whether mbrtowc works not worse than mbtowc. +dnl Result is gl_cv_func_mbrtowc_sanitycheck. + +AC_DEFUN([gl_MBRTOWC_SANITYCHECK], +[ + AC_REQUIRE([AC_PROG_CC]) + AC_REQUIRE([gt_LOCALE_ZH_CN]) + AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles + AC_CACHE_CHECK([whether mbrtowc works as well as mbtowc], + [gl_cv_func_mbrtowc_sanitycheck], + [ + dnl Initial guess, used when cross-compiling or when no suitable locale + dnl is present. +changequote(,)dnl + case "$host_os" in + # Guess no on Solaris 8. + solaris2.8) gl_cv_func_mbrtowc_sanitycheck="guessing no" ;; + # Guess yes otherwise. + *) gl_cv_func_mbrtowc_sanitycheck="guessing yes" ;; + esac +changequote([,])dnl + if test $LOCALE_ZH_CN != none; then + AC_RUN_IFELSE( + [AC_LANG_SOURCE([[ +#include +#include +#include +/* Tru64 with Desktop Toolkit C has a bug: must be included before + . + BSD/OS 4.0.1 has a bug: , and must be + included before . */ +#include +#include +#include +#include +int main () +{ + /* This fails on Solaris 8: + mbrtowc returns 2, and sets wc to 0x00F0. + mbtowc returns 4 (correct) and sets wc to 0x5EDC. */ + if (setlocale (LC_ALL, "$LOCALE_ZH_CN") != NULL) + { + char input[] = "B\250\271\201\060\211\070er"; /* "Büßer" */ + mbstate_t state; + wchar_t wc; + + memset (&state, '\0', sizeof (mbstate_t)); + if (mbrtowc (&wc, input + 3, 6, &state) != 4 + && mbtowc (&wc, input + 3, 6) == 4) + return 1; + } + return 0; +}]])], + [gl_cv_func_mbrtowc_sanitycheck=yes], + [gl_cv_func_mbrtowc_sanitycheck=no], + [:]) + fi + ]) +]) + +dnl Test whether mbrtowc supports a NULL pwc argument correctly. +dnl Result is gl_cv_func_mbrtowc_null_arg1. + +AC_DEFUN([gl_MBRTOWC_NULL_ARG1], +[ + AC_REQUIRE([AC_PROG_CC]) + AC_REQUIRE([gt_LOCALE_FR_UTF8]) + AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles + AC_CACHE_CHECK([whether mbrtowc handles a NULL pwc argument], + [gl_cv_func_mbrtowc_null_arg1], + [ + dnl Initial guess, used when cross-compiling or when no suitable locale + dnl is present. +changequote(,)dnl + case "$host_os" in + # Guess no on Solaris. + solaris*) gl_cv_func_mbrtowc_null_arg1="guessing no" ;; + # Guess yes otherwise. + *) gl_cv_func_mbrtowc_null_arg1="guessing yes" ;; + esac +changequote([,])dnl + if test $LOCALE_FR_UTF8 != none; then + AC_RUN_IFELSE( + [AC_LANG_SOURCE([[ +#include +#include +#include +/* Tru64 with Desktop Toolkit C has a bug: must be included before + . + BSD/OS 4.0.1 has a bug: , and must be + included before . */ +#include +#include +#include +#include +int main () +{ + int result = 0; + + if (setlocale (LC_ALL, "$LOCALE_FR_UTF8") != NULL) + { + char input[] = "\303\237er"; + mbstate_t state; + wchar_t wc; + size_t ret; + + memset (&state, '\0', sizeof (mbstate_t)); + wc = (wchar_t) 0xBADFACE; + ret = mbrtowc (&wc, input, 5, &state); + if (ret != 2) + result |= 1; + if (!mbsinit (&state)) + result |= 2; + + memset (&state, '\0', sizeof (mbstate_t)); + ret = mbrtowc (NULL, input, 5, &state); + if (ret != 2) /* Solaris 7 fails here: ret is -1. */ + result |= 4; + if (!mbsinit (&state)) + result |= 8; + } + return result; +}]])], + [gl_cv_func_mbrtowc_null_arg1=yes], + [gl_cv_func_mbrtowc_null_arg1=no], + [:]) + fi + ]) +]) + +dnl Test whether mbrtowc supports a NULL string argument correctly. +dnl Result is gl_cv_func_mbrtowc_null_arg2. + +AC_DEFUN([gl_MBRTOWC_NULL_ARG2], +[ + AC_REQUIRE([AC_PROG_CC]) + AC_REQUIRE([gt_LOCALE_FR_UTF8]) + AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles + AC_CACHE_CHECK([whether mbrtowc handles a NULL string argument], + [gl_cv_func_mbrtowc_null_arg2], + [ + dnl Initial guess, used when cross-compiling or when no suitable locale + dnl is present. +changequote(,)dnl + case "$host_os" in + # Guess no on OSF/1. + osf*) gl_cv_func_mbrtowc_null_arg2="guessing no" ;; + # Guess yes otherwise. + *) gl_cv_func_mbrtowc_null_arg2="guessing yes" ;; + esac +changequote([,])dnl + if test $LOCALE_FR_UTF8 != none; then + AC_RUN_IFELSE( + [AC_LANG_SOURCE([[ +#include +#include +/* Tru64 with Desktop Toolkit C has a bug: must be included before + . + BSD/OS 4.0.1 has a bug: , and must be + included before . */ +#include +#include +#include +#include +int main () +{ + if (setlocale (LC_ALL, "$LOCALE_FR_UTF8") != NULL) + { + mbstate_t state; + wchar_t wc; + int ret; + + memset (&state, '\0', sizeof (mbstate_t)); + wc = (wchar_t) 0xBADFACE; + mbrtowc (&wc, NULL, 5, &state); + /* Check that wc was not modified. */ + if (wc != (wchar_t) 0xBADFACE) + return 1; + } + return 0; +}]])], + [gl_cv_func_mbrtowc_null_arg2=yes], + [gl_cv_func_mbrtowc_null_arg2=no], + [:]) + fi + ]) +]) + +dnl Test whether mbrtowc, when parsing the end of a multibyte character, +dnl correctly returns the number of bytes that were needed to complete the +dnl character (not the total number of bytes of the multibyte character). +dnl Result is gl_cv_func_mbrtowc_retval. + +AC_DEFUN([gl_MBRTOWC_RETVAL], +[ + AC_REQUIRE([AC_PROG_CC]) + AC_REQUIRE([gt_LOCALE_FR_UTF8]) + AC_REQUIRE([gt_LOCALE_JA]) + AC_REQUIRE([AC_CANONICAL_HOST]) + AC_CACHE_CHECK([whether mbrtowc has a correct return value], + [gl_cv_func_mbrtowc_retval], + [ + dnl Initial guess, used when cross-compiling or when no suitable locale + dnl is present. +changequote(,)dnl + case "$host_os" in + # Guess no on HP-UX, Solaris, native Windows. + hpux* | solaris* | mingw*) gl_cv_func_mbrtowc_retval="guessing no" ;; + # Guess yes otherwise. + *) gl_cv_func_mbrtowc_retval="guessing yes" ;; + esac +changequote([,])dnl + if test $LOCALE_FR_UTF8 != none || test $LOCALE_JA != none \ + || { case "$host_os" in mingw*) true;; *) false;; esac; }; then + AC_RUN_IFELSE( + [AC_LANG_SOURCE([[ +#include +#include +/* Tru64 with Desktop Toolkit C has a bug: must be included before + . + BSD/OS 4.0.1 has a bug: , and must be + included before . */ +#include +#include +#include +#include +int main () +{ + int result = 0; + int found_some_locale = 0; + /* This fails on Solaris. */ + if (setlocale (LC_ALL, "$LOCALE_FR_UTF8") != NULL) + { + char input[] = "B\303\274\303\237er"; /* "Büßer" */ + mbstate_t state; + wchar_t wc; + + memset (&state, '\0', sizeof (mbstate_t)); + if (mbrtowc (&wc, input + 1, 1, &state) == (size_t)(-2)) + { + input[1] = '\0'; + if (mbrtowc (&wc, input + 2, 5, &state) != 1) + result |= 1; + } + found_some_locale = 1; + } + /* This fails on HP-UX 11.11. */ + if (setlocale (LC_ALL, "$LOCALE_JA") != NULL) + { + char input[] = "B\217\253\344\217\251\316er"; /* "Büßer" */ + mbstate_t state; + wchar_t wc; + + memset (&state, '\0', sizeof (mbstate_t)); + if (mbrtowc (&wc, input + 1, 1, &state) == (size_t)(-2)) + { + input[1] = '\0'; + if (mbrtowc (&wc, input + 2, 5, &state) != 2) + result |= 2; + } + found_some_locale = 1; + } + /* This fails on native Windows. */ + if (setlocale (LC_ALL, "Japanese_Japan.932") != NULL) + { + char input[] = "<\223\372\226\173\214\352>"; /* "<日本語>" */ + mbstate_t state; + wchar_t wc; + + memset (&state, '\0', sizeof (mbstate_t)); + if (mbrtowc (&wc, input + 3, 1, &state) == (size_t)(-2)) + { + input[3] = '\0'; + if (mbrtowc (&wc, input + 4, 4, &state) != 1) + result |= 4; + } + found_some_locale = 1; + } + if (setlocale (LC_ALL, "Chinese_Taiwan.950") != NULL) + { + char input[] = "<\244\351\245\273\273\171>"; /* "<日本語>" */ + mbstate_t state; + wchar_t wc; + + memset (&state, '\0', sizeof (mbstate_t)); + if (mbrtowc (&wc, input + 3, 1, &state) == (size_t)(-2)) + { + input[3] = '\0'; + if (mbrtowc (&wc, input + 4, 4, &state) != 1) + result |= 8; + } + found_some_locale = 1; + } + if (setlocale (LC_ALL, "Chinese_China.936") != NULL) + { + char input[] = "<\310\325\261\276\325\132>"; /* "<日本語>" */ + mbstate_t state; + wchar_t wc; + + memset (&state, '\0', sizeof (mbstate_t)); + if (mbrtowc (&wc, input + 3, 1, &state) == (size_t)(-2)) + { + input[3] = '\0'; + if (mbrtowc (&wc, input + 4, 4, &state) != 1) + result |= 16; + } + found_some_locale = 1; + } + return (found_some_locale ? result : 77); +}]])], + [gl_cv_func_mbrtowc_retval=yes], + [if test $? != 77; then + gl_cv_func_mbrtowc_retval=no + fi + ], + [:]) + fi + ]) +]) + +dnl Test whether mbrtowc, when parsing a NUL character, correctly returns 0. +dnl Result is gl_cv_func_mbrtowc_nul_retval. + +AC_DEFUN([gl_MBRTOWC_NUL_RETVAL], +[ + AC_REQUIRE([AC_PROG_CC]) + AC_REQUIRE([gt_LOCALE_ZH_CN]) + AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles + AC_CACHE_CHECK([whether mbrtowc returns 0 when parsing a NUL character], + [gl_cv_func_mbrtowc_nul_retval], + [ + dnl Initial guess, used when cross-compiling or when no suitable locale + dnl is present. +changequote(,)dnl + case "$host_os" in + # Guess no on Solaris 8 and 9. + solaris2.[89]) gl_cv_func_mbrtowc_nul_retval="guessing no" ;; + # Guess yes otherwise. + *) gl_cv_func_mbrtowc_nul_retval="guessing yes" ;; + esac +changequote([,])dnl + if test $LOCALE_ZH_CN != none; then + AC_RUN_IFELSE( + [AC_LANG_SOURCE([[ +#include +#include +/* Tru64 with Desktop Toolkit C has a bug: must be included before + . + BSD/OS 4.0.1 has a bug: , and must be + included before . */ +#include +#include +#include +#include +int main () +{ + /* This fails on Solaris 8 and 9. */ + if (setlocale (LC_ALL, "$LOCALE_ZH_CN") != NULL) + { + mbstate_t state; + wchar_t wc; + + memset (&state, '\0', sizeof (mbstate_t)); + if (mbrtowc (&wc, "", 1, &state) != 0) + return 1; + } + return 0; +}]])], + [gl_cv_func_mbrtowc_nul_retval=yes], + [gl_cv_func_mbrtowc_nul_retval=no], + [:]) + fi + ]) +]) + +# Prerequisites of lib/mbrtowc.c. +AC_DEFUN([gl_PREREQ_MBRTOWC], [ + : +]) + + +dnl From Paul Eggert + +dnl This is an override of an autoconf macro. + +AC_DEFUN([AC_FUNC_MBRTOWC], +[ + dnl Same as AC_FUNC_MBRTOWC in autoconf-2.60. + AC_CACHE_CHECK([whether mbrtowc and mbstate_t are properly declared], + gl_cv_func_mbrtowc, + [AC_LINK_IFELSE( + [AC_LANG_PROGRAM( + [[/* Tru64 with Desktop Toolkit C has a bug: must be + included before . + BSD/OS 4.0.1 has a bug: , and + must be included before . */ + #include + #include + #include + #include ]], + [[wchar_t wc; + char const s[] = ""; + size_t n = 1; + mbstate_t state; + return ! (sizeof state && (mbrtowc) (&wc, s, n, &state));]])], + gl_cv_func_mbrtowc=yes, + gl_cv_func_mbrtowc=no)]) + if test $gl_cv_func_mbrtowc = yes; then + AC_DEFINE([HAVE_MBRTOWC], [1], + [Define to 1 if mbrtowc and mbstate_t are properly declared.]) + fi +]) diff --git a/m4/mbsinit.m4 b/m4/mbsinit.m4 new file mode 100644 index 000000000..2e6d0921a --- /dev/null +++ b/m4/mbsinit.m4 @@ -0,0 +1,51 @@ +# mbsinit.m4 serial 8 +dnl Copyright (C) 2008, 2010-2013 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +AC_DEFUN([gl_FUNC_MBSINIT], +[ + AC_REQUIRE([gl_WCHAR_H_DEFAULTS]) + AC_REQUIRE([AC_CANONICAL_HOST]) + + AC_REQUIRE([AC_TYPE_MBSTATE_T]) + gl_MBSTATE_T_BROKEN + + AC_CHECK_FUNCS_ONCE([mbsinit]) + if test $ac_cv_func_mbsinit = no; then + HAVE_MBSINIT=0 + AC_CHECK_DECLS([mbsinit],,, [[ +/* Tru64 with Desktop Toolkit C has a bug: must be included before + . + BSD/OS 4.0.1 has a bug: , and must be + included before . */ +#include +#include +#include +#include +]]) + if test $ac_cv_have_decl_mbsinit = yes; then + dnl On Minix 3.1.8, the system's declares mbsinit() although + dnl it does not have the function. Avoid a collision with gnulib's + dnl replacement. + REPLACE_MBSINIT=1 + fi + else + if test $REPLACE_MBSTATE_T = 1; then + REPLACE_MBSINIT=1 + else + dnl On mingw, mbsinit() always returns 1, which is inappropriate for + dnl states produced by mbrtowc() for an incomplete multibyte character + dnl in multibyte locales. + case "$host_os" in + mingw*) REPLACE_MBSINIT=1 ;; + esac + fi + fi +]) + +# Prerequisites of lib/mbsinit.c. +AC_DEFUN([gl_PREREQ_MBSINIT], [ + : +]) diff --git a/m4/mbstate_t.m4 b/m4/mbstate_t.m4 new file mode 100644 index 000000000..ed0011798 --- /dev/null +++ b/m4/mbstate_t.m4 @@ -0,0 +1,41 @@ +# mbstate_t.m4 serial 13 +dnl Copyright (C) 2000-2002, 2008-2013 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +# From Paul Eggert. + +# BeOS 5 has but does not define mbstate_t, +# so you can't declare an object of that type. +# Check for this incompatibility with Standard C. + +# AC_TYPE_MBSTATE_T +# ----------------- +AC_DEFUN([AC_TYPE_MBSTATE_T], +[ + AC_REQUIRE([AC_USE_SYSTEM_EXTENSIONS]) dnl for HP-UX 11.11 + + AC_CACHE_CHECK([for mbstate_t], [ac_cv_type_mbstate_t], + [AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM( + [AC_INCLUDES_DEFAULT[ +/* Tru64 with Desktop Toolkit C has a bug: must be included before + . + BSD/OS 4.0.1 has a bug: , and must be + included before . */ +#include +#include +#include +#include ]], + [[mbstate_t x; return sizeof x;]])], + [ac_cv_type_mbstate_t=yes], + [ac_cv_type_mbstate_t=no])]) + if test $ac_cv_type_mbstate_t = yes; then + AC_DEFINE([HAVE_MBSTATE_T], [1], + [Define to 1 if declares mbstate_t.]) + else + AC_DEFINE([mbstate_t], [int], + [Define to a type if does not define.]) + fi +]) diff --git a/m4/quote.m4 b/m4/quote.m4 new file mode 100644 index 000000000..ac49236e1 --- /dev/null +++ b/m4/quote.m4 @@ -0,0 +1,13 @@ +# quote.m4 serial 6 +dnl Copyright (C) 2002-2003, 2005-2006, 2009-2013 Free Software Foundation, +dnl Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +AC_DEFUN([gl_QUOTE], +[ + dnl Prerequisites of lib/quote.c. + dnl (none) + : +]) diff --git a/m4/quotearg.m4 b/m4/quotearg.m4 new file mode 100644 index 000000000..bc0ef0c93 --- /dev/null +++ b/m4/quotearg.m4 @@ -0,0 +1,10 @@ +# quotearg.m4 serial 9 +dnl Copyright (C) 2002, 2004-2013 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +AC_DEFUN([gl_QUOTEARG], +[ + : +]) diff --git a/m4/wctype_h.m4 b/m4/wctype_h.m4 new file mode 100644 index 000000000..82ada0eee --- /dev/null +++ b/m4/wctype_h.m4 @@ -0,0 +1,209 @@ +# wctype_h.m4 serial 18 + +dnl A placeholder for ISO C99 , for platforms that lack it. + +dnl Copyright (C) 2006-2013 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +dnl Written by Paul Eggert. + +AC_DEFUN([gl_WCTYPE_H], +[ + AC_REQUIRE([gl_WCTYPE_H_DEFAULTS]) + AC_REQUIRE([AC_PROG_CC]) + AC_REQUIRE([AC_CANONICAL_HOST]) + AC_CHECK_FUNCS_ONCE([iswcntrl]) + if test $ac_cv_func_iswcntrl = yes; then + HAVE_ISWCNTRL=1 + else + HAVE_ISWCNTRL=0 + fi + AC_SUBST([HAVE_ISWCNTRL]) + + AC_REQUIRE([gt_TYPE_WINT_T]) + if test $gt_cv_c_wint_t = yes; then + HAVE_WINT_T=1 + else + HAVE_WINT_T=0 + fi + AC_SUBST([HAVE_WINT_T]) + + gl_CHECK_NEXT_HEADERS([wctype.h]) + if test $ac_cv_header_wctype_h = yes; then + if test $ac_cv_func_iswcntrl = yes; then + dnl Linux libc5 has an iswprint function that returns 0 for all arguments. + dnl The other functions are likely broken in the same way. + AC_CACHE_CHECK([whether iswcntrl works], [gl_cv_func_iswcntrl_works], + [ + AC_RUN_IFELSE( + [AC_LANG_SOURCE([[ + /* Tru64 with Desktop Toolkit C has a bug: must be + included before . + BSD/OS 4.0.1 has a bug: , and + must be included before . */ + #include + #include + #include + #include + #include + int main () { return iswprint ('x') == 0; } + ]])], + [gl_cv_func_iswcntrl_works=yes], [gl_cv_func_iswcntrl_works=no], + [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include + #if __GNU_LIBRARY__ == 1 + Linux libc5 i18n is broken. + #endif]], [])], + [gl_cv_func_iswcntrl_works="guessing yes"], + [gl_cv_func_iswcntrl_works="guessing no"]) + ]) + ]) + fi + HAVE_WCTYPE_H=1 + else + HAVE_WCTYPE_H=0 + fi + AC_SUBST([HAVE_WCTYPE_H]) + + case "$gl_cv_func_iswcntrl_works" in + *yes) REPLACE_ISWCNTRL=0 ;; + *) REPLACE_ISWCNTRL=1 ;; + esac + AC_SUBST([REPLACE_ISWCNTRL]) + + if test $HAVE_ISWCNTRL = 0 || test $REPLACE_ISWCNTRL = 1; then + dnl Redefine all of iswcntrl, ..., iswxdigit in . + : + fi + + if test $REPLACE_ISWCNTRL = 1; then + REPLACE_TOWLOWER=1 + else + AC_CHECK_FUNCS([towlower]) + if test $ac_cv_func_towlower = yes; then + REPLACE_TOWLOWER=0 + else + AC_CHECK_DECLS([towlower],,, + [[/* Tru64 with Desktop Toolkit C has a bug: must be + included before . + BSD/OS 4.0.1 has a bug: , and + must be included before . */ + #include + #include + #include + #include + #if HAVE_WCTYPE_H + # include + #endif + ]]) + if test $ac_cv_have_decl_towlower = yes; then + dnl On Minix 3.1.8, the system's declares towlower() and + dnl towupper() although it does not have the functions. Avoid a + dnl collision with gnulib's replacement. + REPLACE_TOWLOWER=1 + else + REPLACE_TOWLOWER=0 + fi + fi + fi + AC_SUBST([REPLACE_TOWLOWER]) + + if test $HAVE_ISWCNTRL = 0 || test $REPLACE_TOWLOWER = 1; then + dnl Redefine towlower, towupper in . + : + fi + + dnl We assume that the wctype() and iswctype() functions exist if and only + dnl if the type wctype_t is defined in or in if that + dnl exists. + dnl HP-UX 11.00 declares all these in and lacks . + AC_CACHE_CHECK([for wctype_t], [gl_cv_type_wctype_t], + [AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM( + [[/* Tru64 with Desktop Toolkit C has a bug: must be + included before . + BSD/OS 4.0.1 has a bug: , and + must be included before . */ + #include + #include + #include + #include + #if HAVE_WCTYPE_H + # include + #endif + wctype_t a; + ]], + [[]])], + [gl_cv_type_wctype_t=yes], + [gl_cv_type_wctype_t=no]) + ]) + if test $gl_cv_type_wctype_t = no; then + HAVE_WCTYPE_T=0 + fi + + dnl We assume that the wctrans() and towctrans() functions exist if and only + dnl if the type wctrans_t is defined in . + AC_CACHE_CHECK([for wctrans_t], [gl_cv_type_wctrans_t], + [AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM( + [[/* Tru64 with Desktop Toolkit C has a bug: must be + included before . + BSD/OS 4.0.1 has a bug: , and + must be included before . */ + #include + #include + #include + #include + #include + wctrans_t a; + ]], + [[]])], + [gl_cv_type_wctrans_t=yes], + [gl_cv_type_wctrans_t=no]) + ]) + if test $gl_cv_type_wctrans_t = no; then + HAVE_WCTRANS_T=0 + fi + + dnl Check for declarations of anything we want to poison if the + dnl corresponding gnulib module is not in use. + gl_WARN_ON_USE_PREPARE([[ +/* Tru64 with Desktop Toolkit C has a bug: must be included before + . + BSD/OS 4.0.1 has a bug: , and must be + included before . */ +#if !(defined __GLIBC__ && !defined __UCLIBC__) +# include +# include +# include +# include +#endif +#include + ]], + [wctype iswctype wctrans towctrans + ]) +]) + +AC_DEFUN([gl_WCTYPE_MODULE_INDICATOR], +[ + dnl Use AC_REQUIRE here, so that the default settings are expanded once only. + AC_REQUIRE([gl_WCTYPE_H_DEFAULTS]) + gl_MODULE_INDICATOR_SET_VARIABLE([$1]) + dnl Define it also as a C macro, for the benefit of the unit tests. + gl_MODULE_INDICATOR_FOR_TESTS([$1]) +]) + +AC_DEFUN([gl_WCTYPE_H_DEFAULTS], +[ + GNULIB_ISWBLANK=0; AC_SUBST([GNULIB_ISWBLANK]) + GNULIB_WCTYPE=0; AC_SUBST([GNULIB_WCTYPE]) + GNULIB_ISWCTYPE=0; AC_SUBST([GNULIB_ISWCTYPE]) + GNULIB_WCTRANS=0; AC_SUBST([GNULIB_WCTRANS]) + GNULIB_TOWCTRANS=0; AC_SUBST([GNULIB_TOWCTRANS]) + dnl Assume proper GNU behavior unless another module says otherwise. + HAVE_ISWBLANK=1; AC_SUBST([HAVE_ISWBLANK]) + HAVE_WCTYPE_T=1; AC_SUBST([HAVE_WCTYPE_T]) + HAVE_WCTRANS_T=1; AC_SUBST([HAVE_WCTRANS_T]) + REPLACE_ISWBLANK=0; AC_SUBST([REPLACE_ISWBLANK]) +]) diff --git a/m4/xalloc.m4 b/m4/xalloc.m4 new file mode 100644 index 000000000..64ca70141 --- /dev/null +++ b/m4/xalloc.m4 @@ -0,0 +1,7 @@ +# xalloc.m4 serial 18 +dnl Copyright (C) 2002-2006, 2009-2013 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +AC_DEFUN([gl_XALLOC], [:])