[tin-dev] [tin 2.4.3] snapshots - please test
Urs Janßen
urs at tin.org
Tue Jul 10 09:25:42 CEST 2018
In <mailman.173.1530633342.592.tin-dev at tin.org>, Corinna Vinschen wrote:
>> | o add check for ICU unorm2.h/unorm2_normalize(); prefer it over
>> | ICU unorm.h/unorm_normalize()
>>
>> volunteers? ,-)
>
> Hmm, the call has changed significantly. Not that easy...
untested (I ommited the configure diff, so you either need to
run autoconf (special version needed
[<ftp://ftp.invisible-island.net/autoconf/autoconf-2.13-20110430.tgz>])
once or hard define HAVE_UNICODE_UNORM2_H for the time beeing):
=== modified file 'configure.in'
--- configure.in 2018-07-02 12:19:05 +0000
+++ configure.in 2018-07-09 14:27:02 +0000
@@ -962,6 +962,10 @@
])
])
+AC_CHECK_HEADER(unicode/unorm2.h, [
+ AC_DEFINE(HAVE_UNICODE_UNORM2_H)
+])
+
# without icuuc try GNU libunistring for normalization
if test $cf_try_icuuc = no ; then
AC_CHECK_HEADER(unitypes.h,[
=== modified file 'include/autoconf.hin'
--- include/autoconf.hin 2018-07-02 12:19:05 +0000
+++ include/autoconf.hin 2018-07-09 13:59:12 +0000
@@ -683,6 +683,7 @@
* used for unicode normalization
*/
# undef HAVE_UNICODE_UNORM_H
+# undef HAVE_UNICODE_UNORM2_H
# undef HAVE_UNICODE_USTRING_H
# undef HAVE_UNICODE_UBIDI_H
# undef HAVE_UNICODE_UIDNA_H
=== modified file 'src/string.c'
--- src/string.c 2018-07-03 15:52:14 +0000
+++ src/string.c 2018-07-10 07:12:04 +0000
@@ -1202,35 +1202,72 @@
int32_t needed, norm_len;
UChar *ustr, *norm;
UErrorCode status = U_ZERO_ERROR;
+
+#ifdef HAVE_UNICODE_UNORM2_H
+ static const char *uname[] = {"nfc", "nfkc", "nfkc"};
+ const char *unamep;
+ UNormalization2Mode mode;
+#else
UNormalizationMode mode;
+#endif /* !HAVE_UNICODE_UNORM2_H */
+
+ /* convert to UTF-16 which is used internally by ICU */
+ if ((ustr = char2UChar(tmp)) == NULL) /* something went wrong, return the original string (as valid UTF8) */
+ return tmp;
switch (tinrc.normalization_form) {
case NORMALIZE_NFD:
+#ifdef HAVE_UNICODE_UNORM2_H
+ unamep = uname[0];
+ mode = UNORM2_DECOMPOSE;
+#else
mode = UNORM_NFD;
+#endif /* HAVE_UNICODE_UNORM2_H */
break;
case NORMALIZE_NFC:
+#ifdef HAVE_UNICODE_UNORM2_H
+ unamep = uname[0];
+ mode = UNORM2_COMPOSE;
+#else
mode = UNORM_NFC;
+#endif /* HAVE_UNICODE_UNORM2_H */
break;
case NORMALIZE_NFKD:
+#ifdef HAVE_UNICODE_UNORM2_H
+ unamep = uname[1];
+ mode = UNORM2_DECOMPOSE;
+#else
mode = UNORM_NFKD;
+#endif /* HAVE_UNICODE_UNORM2_H */
break;
case NORMALIZE_NFKC:
default:
+#ifdef HAVE_UNICODE_UNORM2_H
+ unamep = uname[1];
+ mode = UNORM2_COMPOSE;
+#else
mode = UNORM_NFKC;
+#endif /* HAVE_UNICODE_UNORM2_H */
}
- /* convert to UTF-16 which is used internally by ICU */
- if ((ustr = char2UChar(tmp)) == NULL) /* something went wrong, return the original string (as valid UTF8) */
- return tmp;
-
+#ifdef HAVE_UNICODE_UNORM2_H
+ needed = unorm2_normalize(unorm2_getInstance(NULL, unamep, mode, &status), ustr, -1, NULL, 0, &status);
+#else
needed = unorm_normalize(ustr, -1, mode, 0, NULL, 0, &status);
+#endif /* HAVE_UNICODE_UNORM2_H */
+
status = U_ZERO_ERROR; /* reset status */
norm_len = needed + 1;
norm = my_malloc(sizeof(UChar) * norm_len);
+#ifdef HAVE_UNICODE_UNORM2_H
+ (void) unorm2_normalize(unorm2_getInstance(NULL, unamep, mode, &status), ustr, -1, norm, norm_len, &status);
+#else
(void) unorm_normalize(ustr, -1, mode, 0, norm, norm_len, &status);
+#endif /* HAVE_UNICODE_UNORM2_H */
+
if (U_FAILURE(status)) {
/* something went wrong, return the original string (as valid UTF8) */
free(ustr);
More information about the tin-dev
mailing list