module Rdf_utf8: sig
.. end
Handling UTF-8 strings.
val utf8_nb_bytes_of_char : char -> int
Return the number of bytes taken by a the first byte
(as character) of a UTF-8 character.
val utf8_index_of_char : string -> int -> int
utf8_index_of_char str n
returns the position in byte
of the n
th character. n
is 0-based.
Raises Not_found
if there is no n
th character.
val utf8_char_of_index : string -> int -> int
utf8_char_of_index str n
returns the utf8 character
position corresponding to the given byte index n
.
val utf8_string_length : string -> int
utf8_string_length str
returns the number of utf8
characters in str
.
val utf8_substr : string -> int -> int -> string
utf8_substr str pos l
returns the substring of str
from utf8 position pos
to pos+l-1
.
val utf8_is_prefix : string -> string -> bool
utf8_is_prefix s1 s2
returns whether s2
is prefix of s1
.
val utf8_is_suffix : string -> string -> bool
utf8_is_suffix s1 s2
returns whether s2
is suffix of s1
.
val utf8_substr_pos : string -> string -> int option
utf8_substr_pos s1 s2
returns Some n
if s2
is found
at position n
in s1
, starting to search from the beginning
of s1
. Else return None
.
val utf8_contains : string -> string -> bool
utf8_contains s1 s2
returns whether s1
contains s2
.
val utf8_strbefore : string -> string -> string
utf8_strbefore s1 s2
returns the substring before
s2
in
s1
.
See
details.
val utf8_strafter : string -> string -> string
utf8_strafter s1 s2
returns the substring after
s2
in
s1
.
See
details.
val utf8_char_of_code : int -> string
utf8_char_of_code n
return the UTF8 character from a given codepoint.
val utf8_string_get_bol : string -> (int * int) list
utf8_string_get_bol str
returns the list of pairs
(line number, char position)
giving the position of beginning of each line
in the given string.
val utf8_count_nl : string -> int
utf8_count_nl str
returns the number of newline \n characters found
in the given UTF-8 string.
val utf8_escape : string -> string
utf8_escape str
escapes some characters by \n, \r, \b, \t, \quotes
and \\ but do not escape \u.