ICU 68.2
68.2
src
icu
icu4c
source
common
unicode
utf_old.h
Go to the documentation of this file.
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*
6
* Copyright (C) 2002-2012, International Business Machines
7
* Corporation and others. All Rights Reserved.
8
*
9
*******************************************************************************
10
* file name: utf_old.h
11
* encoding: UTF-8
12
* tab size: 8 (not used)
13
* indentation:4
14
*
15
* created on: 2002sep21
16
* created by: Markus W. Scherer
17
*/
18
142
#ifndef __UTF_OLD_H__
143
#define __UTF_OLD_H__
144
145
#include "
unicode/utf.h
"
146
#include "
unicode/utf8.h
"
147
#include "
unicode/utf16.h
"
148
160
#ifndef U_HIDE_OBSOLETE_UTF_OLD_H
161
# define U_HIDE_OBSOLETE_UTF_OLD_H 0
162
#endif
163
164
#if !defined(U_HIDE_DEPRECATED_API) && !U_HIDE_OBSOLETE_UTF_OLD_H
165
166
/* Formerly utf.h, part 1 --------------------------------------------------- */
167
168
#ifdef U_USE_UTF_DEPRECATES
169
176
typedef
int32_t UTextOffset;
177
#endif
178
180
#define UTF_SIZE 16
181
188
#define UTF_SAFE
189
190
#undef UTF_UNSAFE
191
192
#undef UTF_STRICT
193
208
#define UTF8_ERROR_VALUE_1 0x15
209
215
#define UTF8_ERROR_VALUE_2 0x9f
216
223
#define UTF_ERROR_VALUE 0xffff
224
231
#define UTF_IS_ERROR(c) \
232
(((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2)
233
239
#define UTF_IS_VALID(c) \
240
(UTF_IS_UNICODE_CHAR(c) && \
241
(c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2)
242
247
#define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800)
248
254
#define UTF_IS_UNICODE_NONCHAR(c) \
255
((c)>=0xfdd0 && \
256
((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
257
(uint32_t)(c)<=0x10ffff)
258
274
#define UTF_IS_UNICODE_CHAR(c) \
275
((uint32_t)(c)<0xd800 || \
276
((uint32_t)(c)>0xdfff && \
277
(uint32_t)(c)<=0x10ffff && \
278
!UTF_IS_UNICODE_NONCHAR(c)))
279
280
/* Formerly utf8.h ---------------------------------------------------------- */
281
293
#ifdef U_UTF8_IMPL
294
// No forward declaration if compiling utf_impl.cpp, which defines utf8_countTrailBytes.
295
#elif defined(U_STATIC_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION)
296
U_CFUNC
const
uint8_t
utf8_countTrailBytes
[];
297
#else
298
U_CFUNC
U_IMPORT
const
uint8_t
utf8_countTrailBytes
[];
/* U_IMPORT2? */
/*U_IMPORT*/
299
#endif
300
305
#define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte])
306
311
#define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
312
314
#define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0)
315
316
#define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e)
317
318
#define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80)
319
321
#define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f)
322
336
#if 1
337
# define UTF8_CHAR_LENGTH(c) \
338
((uint32_t)(c)<=0x7f ? 1 : \
339
((uint32_t)(c)<=0x7ff ? 2 : \
340
((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \
341
) \
342
)
343
#else
344
# define UTF8_CHAR_LENGTH(c) \
345
((uint32_t)(c)<=0x7f ? 1 : \
346
((uint32_t)(c)<=0x7ff ? 2 : \
347
((uint32_t)(c)<=0xffff ? 3 : \
348
((uint32_t)(c)<=0x10ffff ? 4 : \
349
((uint32_t)(c)<=0x3ffffff ? 5 : \
350
((uint32_t)(c)<=0x7fffffff ? 6 : 3) \
351
) \
352
) \
353
) \
354
) \
355
)
356
#endif
357
359
#define UTF8_MAX_CHAR_LENGTH 4
360
362
#define UTF8_ARRAY_SIZE(size) ((5*(size))/2)
363
365
#define UTF8_GET_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
366
int32_t _utf8_get_char_unsafe_index=(int32_t)(i); \
367
UTF8_SET_CHAR_START_UNSAFE(s, _utf8_get_char_unsafe_index); \
368
UTF8_NEXT_CHAR_UNSAFE(s, _utf8_get_char_unsafe_index, c); \
369
} UPRV_BLOCK_MACRO_END
370
372
#define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
373
int32_t _utf8_get_char_safe_index=(int32_t)(i); \
374
UTF8_SET_CHAR_START_SAFE(s, start, _utf8_get_char_safe_index); \
375
UTF8_NEXT_CHAR_SAFE(s, _utf8_get_char_safe_index, length, c, strict); \
376
} UPRV_BLOCK_MACRO_END
377
379
#define UTF8_NEXT_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
380
(c)=(s)[(i)++]; \
381
if((uint8_t)((c)-0xc0)<0x35) { \
382
uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \
383
UTF8_MASK_LEAD_BYTE(c, __count); \
384
switch(__count) { \
385
/* each following branch falls through to the next one */
\
386
case 3: \
387
(c)=((c)<<6)|((s)[(i)++]&0x3f); \
388
case 2: \
389
(c)=((c)<<6)|((s)[(i)++]&0x3f); \
390
case 1: \
391
(c)=((c)<<6)|((s)[(i)++]&0x3f); \
392
/* no other branches to optimize switch() */
\
393
break; \
394
} \
395
} \
396
} UPRV_BLOCK_MACRO_END
397
399
#define UTF8_APPEND_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
400
if((uint32_t)(c)<=0x7f) { \
401
(s)[(i)++]=(uint8_t)(c); \
402
} else { \
403
if((uint32_t)(c)<=0x7ff) { \
404
(s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
405
} else { \
406
if((uint32_t)(c)<=0xffff) { \
407
(s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
408
} else { \
409
(s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \
410
(s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \
411
} \
412
(s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
413
} \
414
(s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
415
} \
416
} UPRV_BLOCK_MACRO_END
417
419
#define UTF8_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
420
(i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \
421
} UPRV_BLOCK_MACRO_END
422
424
#define UTF8_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
425
int32_t __N=(n); \
426
while(__N>0) { \
427
UTF8_FWD_1_UNSAFE(s, i); \
428
--__N; \
429
} \
430
} UPRV_BLOCK_MACRO_END
431
433
#define UTF8_SET_CHAR_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
434
while(UTF8_IS_TRAIL((s)[i])) { --(i); } \
435
} UPRV_BLOCK_MACRO_END
436
438
#define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
439
(c)=(s)[(i)++]; \
440
if((c)>=0x80) { \
441
if(UTF8_IS_LEAD(c)) { \
442
(c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \
443
} else { \
444
(c)=UTF8_ERROR_VALUE_1; \
445
} \
446
} \
447
} UPRV_BLOCK_MACRO_END
448
450
#define UTF8_APPEND_CHAR_SAFE(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
451
if((uint32_t)(c)<=0x7f) { \
452
(s)[(i)++]=(uint8_t)(c); \
453
} else { \
454
(i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \
455
} \
456
} UPRV_BLOCK_MACRO_END
457
459
#define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length)
460
462
#define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n)
463
465
#define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i)
466
468
#define UTF8_PREV_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
469
(c)=(s)[--(i)]; \
470
if(UTF8_IS_TRAIL(c)) { \
471
uint8_t __b, __count=1, __shift=6; \
472
\
473
/* c is a trail byte */
\
474
(c)&=0x3f; \
475
for(;;) { \
476
__b=(s)[--(i)]; \
477
if(__b>=0xc0) { \
478
UTF8_MASK_LEAD_BYTE(__b, __count); \
479
(c)|=(UChar32)__b<<__shift; \
480
break; \
481
} else { \
482
(c)|=(UChar32)(__b&0x3f)<<__shift; \
483
++__count; \
484
__shift+=6; \
485
} \
486
} \
487
} \
488
} UPRV_BLOCK_MACRO_END
489
491
#define UTF8_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
492
while(UTF8_IS_TRAIL((s)[--(i)])) {} \
493
} UPRV_BLOCK_MACRO_END
494
496
#define UTF8_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
497
int32_t __N=(n); \
498
while(__N>0) { \
499
UTF8_BACK_1_UNSAFE(s, i); \
500
--__N; \
501
} \
502
} UPRV_BLOCK_MACRO_END
503
505
#define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
506
UTF8_BACK_1_UNSAFE(s, i); \
507
UTF8_FWD_1_UNSAFE(s, i); \
508
} UPRV_BLOCK_MACRO_END
509
511
#define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
512
(c)=(s)[--(i)]; \
513
if((c)>=0x80) { \
514
if((c)<=0xbf) { \
515
(c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \
516
} else { \
517
(c)=UTF8_ERROR_VALUE_1; \
518
} \
519
} \
520
} UPRV_BLOCK_MACRO_END
521
523
#define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i)
524
526
#define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n)
527
529
#define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length)
530
531
/* Formerly utf16.h --------------------------------------------------------- */
532
534
#define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800)
535
537
#define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00)
538
540
#define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0)
541
543
#define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
544
546
#define UTF16_GET_PAIR_VALUE(first, second) \
547
(((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET)
548
550
#define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
551
553
#define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
554
556
#define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary)
557
559
#define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary)
560
562
#define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar)
563
565
#define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar)
566
568
#define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar)
569
571
#define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff)
572
574
#define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
575
577
#define UTF16_MAX_CHAR_LENGTH 2
578
580
#define UTF16_ARRAY_SIZE(size) (size)
581
593
#define UTF16_GET_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
594
(c)=(s)[i]; \
595
if(UTF_IS_SURROGATE(c)) { \
596
if(UTF_IS_SURROGATE_FIRST(c)) { \
597
(c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \
598
} else { \
599
(c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \
600
} \
601
} \
602
} UPRV_BLOCK_MACRO_END
603
605
#define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
606
(c)=(s)[i]; \
607
if(UTF_IS_SURROGATE(c)) { \
608
uint16_t __c2; \
609
if(UTF_IS_SURROGATE_FIRST(c)) { \
610
if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \
611
(c)=UTF16_GET_PAIR_VALUE((c), __c2); \
612
/* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */
\
613
} else if(strict) {\
614
/* unmatched first surrogate */
\
615
(c)=UTF_ERROR_VALUE; \
616
} \
617
} else { \
618
if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
619
(c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
620
/* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */
\
621
} else if(strict) {\
622
/* unmatched second surrogate */
\
623
(c)=UTF_ERROR_VALUE; \
624
} \
625
} \
626
} else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
627
(c)=UTF_ERROR_VALUE; \
628
} \
629
} UPRV_BLOCK_MACRO_END
630
632
#define UTF16_NEXT_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
633
(c)=(s)[(i)++]; \
634
if(UTF_IS_FIRST_SURROGATE(c)) { \
635
(c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \
636
} \
637
} UPRV_BLOCK_MACRO_END
638
640
#define UTF16_APPEND_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
641
if((uint32_t)(c)<=0xffff) { \
642
(s)[(i)++]=(uint16_t)(c); \
643
} else { \
644
(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
645
(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
646
} \
647
} UPRV_BLOCK_MACRO_END
648
650
#define UTF16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
651
if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \
652
++(i); \
653
} \
654
} UPRV_BLOCK_MACRO_END
655
657
#define UTF16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
658
int32_t __N=(n); \
659
while(__N>0) { \
660
UTF16_FWD_1_UNSAFE(s, i); \
661
--__N; \
662
} \
663
} UPRV_BLOCK_MACRO_END
664
666
#define UTF16_SET_CHAR_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
667
if(UTF_IS_SECOND_SURROGATE((s)[i])) { \
668
--(i); \
669
} \
670
} UPRV_BLOCK_MACRO_END
671
673
#define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
674
(c)=(s)[(i)++]; \
675
if(UTF_IS_FIRST_SURROGATE(c)) { \
676
uint16_t __c2; \
677
if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \
678
++(i); \
679
(c)=UTF16_GET_PAIR_VALUE((c), __c2); \
680
/* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */
\
681
} else if(strict) {\
682
/* unmatched first surrogate */
\
683
(c)=UTF_ERROR_VALUE; \
684
} \
685
} else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
686
/* unmatched second surrogate or other non-character */
\
687
(c)=UTF_ERROR_VALUE; \
688
} \
689
} UPRV_BLOCK_MACRO_END
690
692
#define UTF16_APPEND_CHAR_SAFE(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
693
if((uint32_t)(c)<=0xffff) { \
694
(s)[(i)++]=(uint16_t)(c); \
695
} else if((uint32_t)(c)<=0x10ffff) { \
696
if((i)+1<(length)) { \
697
(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
698
(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
699
} else
/* not enough space */
{ \
700
(s)[(i)++]=UTF_ERROR_VALUE; \
701
} \
702
} else
/* c>0x10ffff, write error value */
{ \
703
(s)[(i)++]=UTF_ERROR_VALUE; \
704
} \
705
} UPRV_BLOCK_MACRO_END
706
708
#define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length)
709
711
#define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n)
712
714
#define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i)
715
717
#define UTF16_PREV_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
718
(c)=(s)[--(i)]; \
719
if(UTF_IS_SECOND_SURROGATE(c)) { \
720
(c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \
721
} \
722
} UPRV_BLOCK_MACRO_END
723
725
#define UTF16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
726
if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \
727
--(i); \
728
} \
729
} UPRV_BLOCK_MACRO_END
730
732
#define UTF16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
733
int32_t __N=(n); \
734
while(__N>0) { \
735
UTF16_BACK_1_UNSAFE(s, i); \
736
--__N; \
737
} \
738
} UPRV_BLOCK_MACRO_END
739
741
#define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
742
if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
743
++(i); \
744
} \
745
} UPRV_BLOCK_MACRO_END
746
748
#define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
749
(c)=(s)[--(i)]; \
750
if(UTF_IS_SECOND_SURROGATE(c)) { \
751
uint16_t __c2; \
752
if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
753
--(i); \
754
(c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
755
/* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */
\
756
} else if(strict) {\
757
/* unmatched second surrogate */
\
758
(c)=UTF_ERROR_VALUE; \
759
} \
760
} else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
761
/* unmatched first surrogate or other non-character */
\
762
(c)=UTF_ERROR_VALUE; \
763
} \
764
} UPRV_BLOCK_MACRO_END
765
767
#define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i)
768
770
#define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n)
771
773
#define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
774
775
/* Formerly utf32.h --------------------------------------------------------- */
776
777
/*
778
* Old documentation:
779
*
780
* This file defines macros to deal with UTF-32 code units and code points.
781
* Signatures and semantics are the same as for the similarly named macros
782
* in utf16.h.
783
* utf32.h is included by utf.h after unicode/umachine.h</p>
784
* and some common definitions.
785
* <p><b>Usage:</b> ICU coding guidelines for if() statements should be followed when using these macros.
786
* Compound statements (curly braces {}) must be used for if-else-while...
787
* bodies and all macro statements should be terminated with semicolon.</p>
788
*/
789
790
/* internal definitions ----------------------------------------------------- */
791
793
#define UTF32_IS_SAFE(c, strict) \
794
(!(strict) ? \
795
(uint32_t)(c)<=0x10ffff : \
796
UTF_IS_UNICODE_CHAR(c))
797
798
/*
799
* For the semantics of all of these macros, see utf16.h.
800
* The UTF-32 versions are trivial because any code point is
801
* encoded using exactly one code unit.
802
*/
803
804
/* single-code point definitions -------------------------------------------- */
805
806
/* classes of code unit values */
807
809
#define UTF32_IS_SINGLE(uchar) 1
810
811
#define UTF32_IS_LEAD(uchar) 0
812
813
#define UTF32_IS_TRAIL(uchar) 0
814
815
/* number of code units per code point */
816
818
#define UTF32_NEED_MULTIPLE_UCHAR(c) 0
819
820
#define UTF32_CHAR_LENGTH(c) 1
821
822
#define UTF32_MAX_CHAR_LENGTH 1
823
824
/* average number of code units compared to UTF-16 */
825
827
#define UTF32_ARRAY_SIZE(size) (size)
828
830
#define UTF32_GET_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
831
(c)=(s)[i]; \
832
} UPRV_BLOCK_MACRO_END
833
835
#define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
836
(c)=(s)[i]; \
837
if(!UTF32_IS_SAFE(c, strict)) { \
838
(c)=UTF_ERROR_VALUE; \
839
} \
840
} UPRV_BLOCK_MACRO_END
841
842
/* definitions with forward iteration --------------------------------------- */
843
845
#define UTF32_NEXT_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
846
(c)=(s)[(i)++]; \
847
} UPRV_BLOCK_MACRO_END
848
850
#define UTF32_APPEND_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
851
(s)[(i)++]=(c); \
852
} UPRV_BLOCK_MACRO_END
853
855
#define UTF32_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
856
++(i); \
857
} UPRV_BLOCK_MACRO_END
858
860
#define UTF32_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
861
(i)+=(n); \
862
} UPRV_BLOCK_MACRO_END
863
865
#define UTF32_SET_CHAR_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
866
} UPRV_BLOCK_MACRO_END
867
869
#define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
870
(c)=(s)[(i)++]; \
871
if(!UTF32_IS_SAFE(c, strict)) { \
872
(c)=UTF_ERROR_VALUE; \
873
} \
874
} UPRV_BLOCK_MACRO_END
875
877
#define UTF32_APPEND_CHAR_SAFE(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
878
if((uint32_t)(c)<=0x10ffff) { \
879
(s)[(i)++]=(c); \
880
} else
/* c>0x10ffff, write 0xfffd */
{ \
881
(s)[(i)++]=0xfffd; \
882
} \
883
} UPRV_BLOCK_MACRO_END
884
886
#define UTF32_FWD_1_SAFE(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
887
++(i); \
888
} UPRV_BLOCK_MACRO_END
889
891
#define UTF32_FWD_N_SAFE(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
892
if(((i)+=(n))>(length)) { \
893
(i)=(length); \
894
} \
895
} UPRV_BLOCK_MACRO_END
896
898
#define UTF32_SET_CHAR_START_SAFE(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
899
} UPRV_BLOCK_MACRO_END
900
901
/* definitions with backward iteration -------------------------------------- */
902
904
#define UTF32_PREV_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
905
(c)=(s)[--(i)]; \
906
} UPRV_BLOCK_MACRO_END
907
909
#define UTF32_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
910
--(i); \
911
} UPRV_BLOCK_MACRO_END
912
914
#define UTF32_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
915
(i)-=(n); \
916
} UPRV_BLOCK_MACRO_END
917
919
#define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
920
} UPRV_BLOCK_MACRO_END
921
923
#define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
924
(c)=(s)[--(i)]; \
925
if(!UTF32_IS_SAFE(c, strict)) { \
926
(c)=UTF_ERROR_VALUE; \
927
} \
928
} UPRV_BLOCK_MACRO_END
929
931
#define UTF32_BACK_1_SAFE(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
932
--(i); \
933
} UPRV_BLOCK_MACRO_END
934
936
#define UTF32_BACK_N_SAFE(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
937
(i)-=(n); \
938
if((i)<(start)) { \
939
(i)=(start); \
940
} \
941
} UPRV_BLOCK_MACRO_END
942
944
#define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
945
} UPRV_BLOCK_MACRO_END
946
947
/* Formerly utf.h, part 2 --------------------------------------------------- */
948
954
#define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size)
955
957
#define UTF_GET_CHAR_UNSAFE(s, i, c) UTF16_GET_CHAR_UNSAFE(s, i, c)
958
960
#define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
961
962
964
#define UTF_NEXT_CHAR_UNSAFE(s, i, c) UTF16_NEXT_CHAR_UNSAFE(s, i, c)
965
967
#define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict) UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
968
969
971
#define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c)
972
974
#define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
975
976
978
#define UTF_FWD_1_UNSAFE(s, i) UTF16_FWD_1_UNSAFE(s, i)
979
981
#define UTF_FWD_1_SAFE(s, i, length) UTF16_FWD_1_SAFE(s, i, length)
982
983
985
#define UTF_FWD_N_UNSAFE(s, i, n) UTF16_FWD_N_UNSAFE(s, i, n)
986
988
#define UTF_FWD_N_SAFE(s, i, length, n) UTF16_FWD_N_SAFE(s, i, length, n)
989
990
992
#define UTF_SET_CHAR_START_UNSAFE(s, i) UTF16_SET_CHAR_START_UNSAFE(s, i)
993
995
#define UTF_SET_CHAR_START_SAFE(s, start, i) UTF16_SET_CHAR_START_SAFE(s, start, i)
996
997
999
#define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c)
1000
1002
#define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
1003
1004
1006
#define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i)
1007
1009
#define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i)
1010
1011
1013
#define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n)
1014
1016
#define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n)
1017
1018
1020
#define UTF_SET_CHAR_LIMIT_UNSAFE(s, i) UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)
1021
1023
#define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)
1024
1025
/* Define default macros (UTF-16 "safe") ------------------------------------ */
1026
1032
#define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar)
1033
1039
#define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar)
1040
1046
#define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar)
1047
1053
#define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c)
1054
1060
#define UTF_CHAR_LENGTH(c) U16_LENGTH(c)
1061
1067
#define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH
1068
1078
#define UTF_GET_CHAR(s, start, i, length, c) U16_GET(s, start, i, length, c)
1079
1091
#define UTF_NEXT_CHAR(s, i, length, c) U16_NEXT(s, i, length, c)
1092
1104
#define UTF_APPEND_CHAR(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
1105
1115
#define UTF_FWD_1(s, i, length) U16_FWD_1(s, i, length)
1116
1126
#define UTF_FWD_N(s, i, length, n) U16_FWD_N(s, i, length, n)
1127
1142
#define UTF_SET_CHAR_START(s, start, i) U16_SET_CP_START(s, start, i)
1143
1155
#define UTF_PREV_CHAR(s, start, i, c) U16_PREV(s, start, i, c)
1156
1168
#define UTF_BACK_1(s, start, i) U16_BACK_1(s, start, i)
1169
1181
#define UTF_BACK_N(s, start, i, n) U16_BACK_N(s, start, i, n)
1182
1197
#define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
1198
1199
#endif // !U_HIDE_DEPRECATED_API && !U_HIDE_OBSOLETE_UTF_OLD_H
1200
1201
#endif
utf.h
C API: Code point macros.
U_IMPORT
#define U_IMPORT
Definition:
platform.h:847
utf8_countTrailBytes
U_CFUNC const U_IMPORT uint8_t utf8_countTrailBytes[]
Definition:
utf_old.h:298
utf16.h
C API: 16-bit Unicode handling macros.
U_CFUNC
#define U_CFUNC
Definition:
umachine.h:84
utf8.h
C API: 8-bit Unicode handling macros.
Generated by
1.8.17