% \iffalse meta-comment % %% File: l3str.dtx % % Copyright (C) 2011-2024 The LaTeX Project % % It may be distributed and/or modified under the conditions of the % LaTeX Project Public License (LPPL), either version 1.3c of this % license or (at your option) any later version. The latest version % of this license is in the file % % https://www.latex-project.org/lppl.txt % % This file is part of the "l3kernel bundle" (The Work in LPPL) % and all files in that bundle must be distributed together. % % ----------------------------------------------------------------------- % % The development version of the bundle can be found at % % https://github.com/latex3/latex3 % % for those people who are interested. % %<*driver> \documentclass[full,kernel]{l3doc} \begin{document} \DocInput{\jobname.dtx} \end{document} % % \fi % % \title{^^A % The \pkg{l3str} module\\ Strings^^A % } % % \author{^^A % The \LaTeX{} Project\thanks % {^^A % E-mail: % \href{mailto:latex-team@latex-project.org} % {latex-team@latex-project.org}^^A % }^^A % } % % \date{Released 2024-03-14} % % \maketitle % % \begin{documentation} % % \TeX{} associates each character with a category code: as such, there is no % concept of a \enquote{string} as commonly understood in many other % programming languages. However, there are places where we wish to manipulate % token lists while in some sense \enquote{ignoring} category codes: this is % done by treating token lists as strings in a \TeX{} sense. % % A \TeX{} string (and thus an \pkg{expl3} string) is a series of characters % which have category code $12$ (\enquote{other}) with the exception of % space characters which have category code $10$ (\enquote{space}). Thus % at a technical level, a \TeX{} string is a token list with the appropriate % category codes. In this documentation, these are simply referred to as % strings. % % String variables are simply specialised token lists, but by convention % should be named with the suffix \texttt{\ldots{}str}. Such variables % should contain characters with category code $12$ (other), except % spaces, which have category code $10$ (blank space). All the % functions in this module which accept a token list argument first % convert it to a string using \cs{tl_to_str:n} for internal processing, % and do not treat a token list or the corresponding string % representation differently. % % As a string is a subset of the more general token list, it is sometimes unclear % when one should be used over the other. % Use a string variable for data that isn't primarily intended for typesetting % and for which a level of protection from unwanted expansion is suitable. % This data type simplifies comparison of variables since there are no concerns % about expansion of their contents. % % The functions \cs{cs_to_str:N}, \cs{tl_to_str:n}, \cs{tl_to_str:N} and % \cs{token_to_str:N} (and variants) generate strings from the appropriate % input: these are documented in \pkg{l3basics}, \pkg{l3tl} and \pkg{l3token}, % respectively. % % Most expandable functions in this module come in three flavours: % \begin{itemize} % \item \cs[no-index]{str_\ldots{}:N}, which expect a token list or string % variable as their argument; % \item \cs[no-index]{str_\ldots{}:n}, taking any token list (or string) as an % argument; % \item \cs[no-index]{str_\ldots{}_ignore_spaces:n}, which ignores any space % encountered during the operation: these functions are typically % faster than those which take care of escaping spaces % appropriately. % \end{itemize} % % \section{Creating and initialising string variables} % % \begin{function}[added = 2015-09-18]{\str_new:N, \str_new:c} % \begin{syntax} % \cs{str_new:N} \meta{str~var} % \end{syntax} % Creates a new \meta{str~var} or raises an error if the name is % already taken. The declaration is global. The \meta{str~var} is % initially empty. % \end{function} % % \begin{function}[added = 2015-09-18, updated = 2018-07-28] % { % \str_const:Nn, \str_const:NV, \str_const:Ne, % \str_const:cn, \str_const:cV, \str_const:ce % } % \begin{syntax} % \cs{str_const:Nn} \meta{str~var} \Arg{token list} % \end{syntax} % Creates a new constant \meta{str~var} or raises an error if the name % is already taken. The value of the \meta{str~var} is set % globally to the \meta{token list}, converted to a string. % \end{function} % % \begin{function}[added = 2015-09-18] % {\str_clear:N, \str_clear:c, \str_gclear:N, \str_gclear:c} % \begin{syntax} % \cs{str_clear:N} \meta{str~var} % \end{syntax} % Clears the content of the \meta{str~var}. % \end{function} % % \begin{function}[added = 2015-09-18] % { % \str_clear_new:N, \str_clear_new:c, % \str_gclear_new:N, \str_gclear_new:c % } % \begin{syntax} % \cs{str_clear_new:N} \meta{str~var} % \end{syntax} % Ensures that the \meta{str~var} exists globally by applying % \cs{str_new:N} if necessary, then applies % \cs[index=str_clear:N]{str_(g)clear:N} to leave % the \meta{str~var} empty. % \end{function} % % \begin{function}[added = 2015-09-18] % { % \str_set_eq:NN, \str_set_eq:cN, \str_set_eq:Nc, \str_set_eq:cc, % \str_gset_eq:NN, \str_gset_eq:cN, \str_gset_eq:Nc, \str_gset_eq:cc % } % \begin{syntax} % \cs{str_set_eq:NN} \meta{str~var_1} \meta{str~var_2} % \end{syntax} % Sets the content of \meta{str~var_1} equal to that of % \meta{str~var_2}. % \end{function} % % \begin{function}[added = 2017-10-08] % { % \str_concat:NNN, \str_concat:ccc, % \str_gconcat:NNN, \str_gconcat:ccc % } % \begin{syntax} % \cs{str_concat:NNN} \meta{str~var_1} \meta{str~var_2} \meta{str~var_3} % \end{syntax} % Concatenates the content of \meta{str~var_2} and \meta{str~var_3} % together and saves the result in \meta{str~var_1}. The \meta{str~var_2} % is placed at the left side of the new string variable. % The \meta{str~var_2} and \meta{str~var_3} must indeed be strings, as % this function does not convert their contents to a string. % \end{function} % % \begin{function}[EXP, pTF, added = 2015-09-18] % {\str_if_exist:N, \str_if_exist:c} % \begin{syntax} % \cs{str_if_exist_p:N} \meta{str~var} % \cs{str_if_exist:NTF} \meta{str~var} \Arg{true code} \Arg{false code} % \end{syntax} % Tests whether the \meta{str~var} is currently defined. This does not % check that the \meta{str~var} really is a string. % \end{function} % % \section{Adding data to string variables} % % \begin{function}[added = 2015-09-18, updated = 2018-07-28] % { % \str_set:Nn, \str_set:NV, \str_set:Ne, % \str_set:cn, \str_set:cV, \str_set:ce, % \str_gset:Nn, \str_gset:NV, \str_gset:Ne, % \str_gset:cn, \str_gset:cV, \str_gset:ce % } % \begin{syntax} % \cs{str_set:Nn} \meta{str var} \Arg{token list} % \end{syntax} % Converts the \meta{token list} to a \meta{string}, and stores the % result in \meta{str var}. % \end{function} % % \begin{function}[added = 2015-09-18, updated = 2018-07-28] % { % \str_put_left:Nn, \str_put_left:NV, \str_put_left:Ne, % \str_put_left:cn, \str_put_left:cV, \str_put_left:ce, % \str_gput_left:Nn, \str_gput_left:NV, \str_gput_left:Ne, % \str_gput_left:cn, \str_gput_left:cV, \str_gput_left:ce % } % \begin{syntax} % \cs{str_put_left:Nn} \meta{str var} \Arg{token list} % \end{syntax} % Converts the \meta{token list} to a \meta{string}, and prepends the % result to \meta{str var}. The current contents of the \meta{str % var} are not automatically converted to a string. % \end{function} % % \begin{function}[added = 2015-09-18, updated = 2018-07-28] % { % \str_put_right:Nn, \str_put_right:NV, \str_put_right:Ne, % \str_put_right:cn, \str_put_right:cV, \str_put_right:Ne, % \str_gput_right:Nn, \str_gput_right:NV, \str_gput_right:Ne, % \str_gput_right:cn, \str_gput_right:cV, \str_gput_right:ce % } % \begin{syntax} % \cs{str_put_right:Nn} \meta{str var} \Arg{token list} % \end{syntax} % Converts the \meta{token list} to a \meta{string}, and appends the % result to \meta{str var}. The current contents of the \meta{str % var} are not automatically converted to a string. % \end{function} % % \section{String conditionals} % % \begin{function}[EXP,pTF, added = 2015-09-18, updated = 2022-03-21] % {\str_if_empty:N, \str_if_empty:c, \str_if_empty:n} % \begin{syntax} % \cs{str_if_empty_p:N} \meta{str~var} % \cs{str_if_empty:NTF} \meta{str~var} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if the \meta{string variable} is entirely empty % (\emph{i.e.}~contains no characters at all). % \end{function} % % \begin{function}[EXP,pTF, added = 2015-09-18] % {\str_if_eq:NN, \str_if_eq:Nc, \str_if_eq:cN, \str_if_eq:cc} % \begin{syntax} % \cs{str_if_eq_p:NN} \meta{str~var_1} \meta{str~var_2} % \cs{str_if_eq:NNTF} \meta{str~var_1} \meta{str~var_2} \Arg{true code} \Arg{false code} % \end{syntax} % Compares the content of two \meta{str variables} and % is logically \texttt{true} if the two contain the same characters % in the same order. See \cs{tl_if_eq:NNTF} to compare tokens % (including their category codes) rather than characters. % \end{function} % % \begin{function}[EXP,pTF, updated = 2018-06-18] % { % \str_if_eq:nn, \str_if_eq:Vn, \str_if_eq:on, \str_if_eq:no, % \str_if_eq:nV, \str_if_eq:VV, \str_if_eq:vn, \str_if_eq:nv, % \str_if_eq:ee % } % \begin{syntax} % \cs{str_if_eq_p:nn} \Arg{tl_1} \Arg{tl_2} % \cs{str_if_eq:nnTF} \Arg{tl_1} \Arg{tl_2} \Arg{true code} \Arg{false code} % \end{syntax} % Compares the two \meta{token lists} on a character by character % basis (namely after converting them to strings), % and is \texttt{true} if the two \meta{strings} contain the same % characters in the same order. Thus for example % \begin{verbatim} % \str_if_eq_p:no { abc } { \tl_to_str:n { abc } } % \end{verbatim} % is logically \texttt{true}. See \cs{tl_if_eq:nnTF} to compare % tokens (including their category codes) rather than characters. % \end{function} % % \begin{function}[TF, added = 2017-10-08]{\str_if_in:Nn, \str_if_in:cn} % \begin{syntax} % \cs{str_if_in:NnTF} \meta{str~var} \Arg{token list} \Arg{true code} \Arg{false code} % \end{syntax} % Converts the \meta{token list} to a \meta{string} and % tests if that \meta{string} is found in the content of the % \meta{str~var}. % \end{function} % % \begin{function}[TF, added = 2017-10-08]{\str_if_in:nn} % \begin{syntax} % \cs{str_if_in:nnTF} \Arg{tl_1} \Arg{tl_2} \Arg{true code} \Arg{false code} % \end{syntax} % Converts both \meta{token lists} to \meta{strings} and % tests whether \meta{string_2} is found inside \meta{string_1}. % \end{function} % % \begin{function}[added = 2013-07-24, updated = 2022-03-21, EXP, noTF] % { % \str_case:nn, \str_case:Vn, \str_case:Nn, \str_case:on, \str_case:en, % \str_case:nV, \str_case:nv % } % \begin{syntax} % \cs{str_case:nnTF} \Arg{test string} \\ % ~~|{| \\ % ~~~~\Arg{string case_1} \Arg{code case_1} \\ % ~~~~\Arg{string case_2} \Arg{code case_2} \\ % ~~~~\ldots \\ % ~~~~\Arg{string case_n} \Arg{code case_n} \\ % ~~|}| \\ % ~~\Arg{true code} % ~~\Arg{false code} % \end{syntax} % Compares the \meta{test string} in turn with each % of the \meta{string case}s (all token lists are converted to strings). % If the two are equal (as described for % \cs{str_if_eq:nnTF}) then the associated \meta{code} is left in the % input stream and other cases are discarded. If any of the % cases are matched, the \meta{true code} is also inserted into the % input stream (after the code for the appropriate case), while if none % match then the \meta{false code} is inserted. The function % \cs{str_case:nn}, which does nothing if there is no match, is also % available. % % This set of functions performs no expansion on each % \meta{string~case} argument, so any variable in there will be % compared as a string. If expansion is needed in the % \meta{string~case}s, then \cs[no-index]{str_case_e:nn(TF)} should % be used instead. % \end{function} % % \begin{function}[added = 2018-06-19, EXP, noTF] % {\str_case_e:nn, \str_case_e:en} % \begin{syntax} % \cs{str_case_e:nnTF} \Arg{test string} \\ % ~~|{| \\ % ~~~~\Arg{string case_1} \Arg{code case_1} \\ % ~~~~\Arg{string case_2} \Arg{code case_2} \\ % ~~~~\ldots \\ % ~~~~\Arg{string case_n} \Arg{code case_n} \\ % ~~|}| \\ % ~~\Arg{true code} % ~~\Arg{false code} % \end{syntax} % Compares the full expansion of the \meta{test string} % in turn with the full expansion of the \meta{string case}s % (all token lists are converted to strings). If the two % full expansions are equal (as described for \cs{str_if_eq:eeTF}) then the % associated \meta{code} is left in the input stream % and other cases are discarded. If any of the % cases are matched, the \meta{true code} is also inserted into the % input stream (after the code for the appropriate case), while if none % match then the \meta{false code} is inserted. The function % \cs{str_case_e:nn}, which does nothing if there is no match, is also % available. % In \cs[index=str_case_e:nnTF]{str_case_e:nn(TF)}, the \meta{test string} % is expanded in each comparison, and must always yield the same result: % for example, random numbers must not be used within this string. % \end{function} % % \begin{function}[EXP, pTF, added = 2021-05-17]{\str_compare:nNn, \str_compare:eNe} % \begin{syntax} % \cs{str_compare_p:nNn} \Arg{tl_1} \meta{relation} \Arg{tl_2} % \cs{str_compare:nNnTF} \Arg{tl_1} \meta{relation} \Arg{tl_2} \Arg{true code} \Arg{false code} % \end{syntax} % Compares the two \meta{token lists} on a character by character % basis (namely after converting them to strings) in a lexicographic % order according to the character codes of the characters. The % \meta{relation} can be |<|, |=|, or~|>| and the test is % \texttt{true} under the following conditions: % \begin{itemize} % \item for |<|, if the first string is earlier than the second in lexicographic order; % \item for |=|, if the two strings have exactly the same characters; % \item for |>|, if the first string is later than the second in lexicographic order. % \end{itemize} % Thus for example the following is logically \texttt{true}: % \begin{verbatim} % \str_compare_p:nNn { ab } < { abc } % \end{verbatim} % \begin{texnote} % This is a wrapper around the \TeX{} primitive % \cs[index=pdfstrcmp]{(pdf)strcmp}. It is meant for programming % and not for sorting textual contents, as it simply considers % character codes and not more elaborate considerations of grapheme % clusters, locale, etc. % \end{texnote} % \end{function} % % \section{Mapping over strings} % % All mappings are done at the current group level, \emph{i.e.}~any % local assignments made by the \meta{function} or \meta{code} discussed % below remain in effect after the loop. % % \begin{function}[added = 2017-11-14, rEXP] % {\str_map_function:nN, \str_map_function:NN, \str_map_function:cN} % \begin{syntax} % \cs{str_map_function:nN} \Arg{token list} \meta{function} % \cs{str_map_function:NN} \meta{str~var} \meta{function} % \end{syntax} % Converts the \meta{token list} to a \meta{string} then % applies \meta{function} to every \meta{character} in the % \meta{string} including spaces. % \end{function} % % \begin{function}[added = 2017-11-14] % {\str_map_inline:nn, \str_map_inline:Nn, \str_map_inline:cn} % \begin{syntax} % \cs{str_map_inline:nn} \Arg{token list} \Arg{inline function} % \cs{str_map_inline:Nn} \meta{str~var} \Arg{inline function} % \end{syntax} % Converts the \meta{token list} to a \meta{string} then % applies the \meta{inline function} to every \meta{character} in the % \meta{str~var} including spaces. % The \meta{inline function} should consist of code which % receives the \meta{character} as |#1|. % \end{function} % % \begin{function}[rEXP, added = 2021-05-05] % {\str_map_tokens:nn, \str_map_tokens:Nn, \str_map_tokens:cn} % \begin{syntax} % \cs{str_map_tokens:nn} \Arg{token list} \Arg{code} % \cs{str_map_tokens:Nn} \meta{str~var} \Arg{code} % \end{syntax} % Converts the \meta{token list} to a \meta{string} then applies % \meta{code} to every \meta{character} in the \meta{string} including % spaces. The \meta{code} receives each character as a trailing brace % group. This is equivalent to \cs{str_map_function:nN} if the % \meta{code} consists of a single function. % \end{function} % % \begin{function}[added = 2017-11-14] % {\str_map_variable:nNn, \str_map_variable:NNn, \str_map_variable:cNn} % \begin{syntax} % \cs{str_map_variable:nNn} \Arg{token list} \meta{variable} \Arg{code} % \cs{str_map_variable:NNn} \meta{str~var} \meta{variable} \Arg{code} % \end{syntax} % Converts the \meta{token list} to a \meta{string} then stores each % \meta{character} in the \meta{string} (including spaces) in turn in % the (string or token list) \meta{variable} and applies the % \meta{code}. The \meta{code} will usually make use of the % \meta{variable}, but this is not enforced. The assignments to the % \meta{variable} are local. Its value after the loop is the last % \meta{character} in the \meta{string}, or its original value if the % \meta{string} is empty. See also \cs{str_map_inline:Nn}. % \end{function} % % \begin{function}[added = 2017-10-08, rEXP]{\str_map_break:} % \begin{syntax} % \cs{str_map_break:} % \end{syntax} % Used to terminate a \cs[no-index]{str_map_\ldots} function before all % characters in the \meta{string} have been processed. This % normally takes place within a conditional statement, for example % \begin{verbatim} % \str_map_inline:Nn \l_my_str % { % \str_if_eq:nnT { #1 } { bingo } { \str_map_break: } % % Do something useful % } % \end{verbatim} % See also \cs{str_map_break:n}. % Use outside of a \cs[no-index]{str_map_\ldots} scenario leads to low % level \TeX{} errors. % \begin{texnote} % When the mapping is broken, additional tokens may be inserted % before continuing with the % code that follows the loop. % This depends on the design of the mapping function. % \end{texnote} % \end{function} % % \begin{function}[added = 2017-10-08, rEXP]{\str_map_break:n} % \begin{syntax} % \cs{str_map_break:n} \Arg{code} % \end{syntax} % Used to terminate a \cs[no-index]{str_map_\ldots} function before all % characters in the \meta{string} have been processed, inserting % the \meta{code} after the mapping has ended. This % normally takes place within a conditional statement, for example % \begin{verbatim} % \str_map_inline:Nn \l_my_str % { % \str_if_eq:nnT { #1 } { bingo } % { \str_map_break:n {

 } }
%         % Do something useful
%       }
%   \end{verbatim}
%   Use outside of a \cs[no-index]{str_map_\ldots} scenario leads to low
%   level \TeX{} errors.
%   \begin{texnote}
%     When the mapping is broken, additional tokens may be inserted
%     before the \meta{code} is
%     inserted into the input stream.
%     This depends on the design of the mapping function.
%   \end{texnote}
% \end{function}
%
% \section{Working with the content of strings}
%
% \begin{function}[EXP, added = 2015-09-18]{\str_use:N, \str_use:c}
%   \begin{syntax}
%     \cs{str_use:N} \meta{str~var}
%   \end{syntax}
%   Recovers the content of a \meta{str~var} and places it
%   directly in the input stream. An error is raised if the variable
%   does not exist or if it is invalid. Note that it is possible to use
%   a \meta{str} directly without an accessor function.
% \end{function}
%
% \begin{function}[EXP, added = 2015-09-18]
%   {\str_count:N, \str_count:c, \str_count:n, \str_count_ignore_spaces:n}
%   \begin{syntax}
%     \cs{str_count:n} \Arg{token list}
%   \end{syntax}
%   Leaves in the input stream the number of characters in the string
%   representation of \meta{token list}, as an integer denotation.  The
%   functions differ in their treatment of spaces.  In the case of
%   \cs{str_count:N} and \cs{str_count:n}, all characters including
%   spaces are counted.  The \cs{str_count_ignore_spaces:n} function
%   leaves the number of non-space characters in the input stream.
% \end{function}
%
% \begin{function}[EXP, added = 2015-09-18]
%   {\str_count_spaces:N, \str_count_spaces:c, \str_count_spaces:n}
%   \begin{syntax}
%     \cs{str_count_spaces:n} \Arg{token list}
%   \end{syntax}
%   Leaves in the input stream the number of space characters in the
%   string representation of \meta{token list}, as an integer
%   denotation. Of course, this function has no \texttt{_ignore_spaces}
%   variant.
% \end{function}
%
% \begin{function}[EXP, added = 2015-09-18]
%   {\str_head:N, \str_head:c, \str_head:n, \str_head_ignore_spaces:n}
%   \begin{syntax}
%     \cs{str_head:n} \Arg{token list}
%   \end{syntax}
%   Converts the \meta{token list} into a \meta{string}.  The first
%   character in the \meta{string} is then left in the input stream,
%   with category code \enquote{other}.  The functions differ if the
%   first character is a space: \cs{str_head:N} and \cs{str_head:n}
%   return a space token with category code~$10$ (blank space), while
%   the \cs{str_head_ignore_spaces:n} function ignores this space
%   character and leaves the first non-space character in the input
%   stream.  If the \meta{string} is empty (or only contains spaces in
%   the case of the \texttt{_ignore_spaces} function), then nothing is
%   left on the input stream.
% \end{function}
%
% \begin{function}[EXP, added = 2015-09-18]
%   {\str_tail:N, \str_tail:c, \str_tail:n, \str_tail_ignore_spaces:n}
%   \begin{syntax}
%     \cs{str_tail:n} \Arg{token list}
%   \end{syntax}
%   Converts the \meta{token list} to a \meta{string}, removes the first
%   character, and leaves the remaining characters (if any) in the input
%   stream, with category codes $12$ and $10$ (for spaces).  The
%   functions differ in the case where the first character is a space:
%   \cs{str_tail:N} and \cs{str_tail:n} only trim that space, while
%   \cs{str_tail_ignore_spaces:n} removes the first non-space character
%   and any space before it.  If the \meta{token list} is empty (or
%   blank in the case of the \texttt{_ignore_spaces} variant), then
%   nothing is left on the input stream.
% \end{function}
%
% \begin{function}[EXP, added = 2015-09-18]
%   {\str_item:Nn, \str_item:nn, \str_item_ignore_spaces:nn}
%   \begin{syntax}
%     \cs{str_item:nn} \Arg{token list} \Arg{integer expression}
%   \end{syntax}
%   Converts the \meta{token list} to a \meta{string}, and leaves in the
%   input stream the character in position \meta{integer expression} of
%   the \meta{string}, starting at $1$ for the first (left-most)
%   character.  In the case of \cs{str_item:Nn} and \cs{str_item:nn},
%   all characters including spaces are taken into account.  The
%   \cs{str_item_ignore_spaces:nn} function skips spaces when counting
%   characters.  If the \meta{integer expression} is negative,
%   characters are counted from the end of the \meta{string}. Hence,
%   $-1$ is the right-most character, \emph{etc.}
% \end{function}
%
% \begin{function}[EXP, added = 2015-09-18]
%   {
%     \str_range:Nnn, \str_range:cnn, \str_range:nnn,
%     \str_range_ignore_spaces:nnn
%   }
%   \begin{syntax}
%     \cs{str_range:nnn} \Arg{token list} \Arg{start index} \Arg{end index}
%   \end{syntax}
%   Converts the \meta{token list} to a \meta{string}, and leaves in the
%   input stream the characters from the \meta{start index} to the
%   \meta{end index} inclusive.  Spaces are preserved and counted as items
%   (contrast this with \cs{tl_range:nnn} where spaces are not counted as
%   items and are possibly discarded from the output).
%
%   Here \meta{start index} and \meta{end index} should be integer denotations.
%   For describing in detail the functions' behavior, let $m$ and $n$ be the start
%   and end index respectively. If either is $0$, the result is empty. A positive
%   index means `start counting from the left end', a negative index means
%   `start counting from the right end'. Let $l$ be the count of the token list.
%
%   The \emph{actual start point} is determined as $M=m$ if~$m>0$ and as $M=l+m+1$
%   if~$m<0$. Similarly the \emph{actual end point} is $N=n$ if~$n>0$ and $N=l+n+1$
%   if~$n<0$. If $M>N$, the result is empty. Otherwise it consists of all items from
%   position $M$ to position $N$ inclusive; for the purpose of this rule, we can
%   imagine that the token list extends at infinity on either side, with void items
%   at positions $s$ for $s\le0$ or $s>l$.
%   For instance,
%   \begin{verbatim}
%     \iow_term:e { \str_range:nnn { abcdef } { 2 } { 5 } }
%     \iow_term:e { \str_range:nnn { abcdef } { -4 } { -1 } }
%     \iow_term:e { \str_range:nnn { abcdef } { -2 } { -1 } }
%     \iow_term:e { \str_range:nnn { abcdef } { 0 } { -1 } }
%   \end{verbatim}
%   prints \texttt{bcde}, \texttt{cdef}, \texttt{ef}, and an empty
%   line to the terminal. The \meta{start index} must always be smaller than
%   or equal to the \meta{end index}: if this is not the case then no output
%   is generated. Thus
%   \begin{verbatim}
%     \iow_term:e { \str_range:nnn { abcdef } { 5 } { 2 } }
%     \iow_term:e { \str_range:nnn { abcdef } { -1 } { -4 } }
%   \end{verbatim}
%   both yield empty strings.
% \end{function}
%
% ^^A If this stays in the same {function} environment, we get a really
% ^^A awful page break. Perhaps we should add a way to allow a page break
% ^^A in a function environment...
%   The behavior of \cs{str_range_ignore_spaces:nnn} is similar, but spaces
%   are removed before starting the job. The input
%   \begin{verbatim}
%     \iow_term:e { \str_range:nnn { abcdefg } { 2 } { 5 } }
%     \iow_term:e { \str_range:nnn { abcdefg } { 2 } { -3 } }
%     \iow_term:e { \str_range:nnn { abcdefg } { -6 } { 5 } }
%     \iow_term:e { \str_range:nnn { abcdefg } { -6 } { -3 } }
%
%     \iow_term:e { \str_range:nnn { abc~efg } { 2 } { 5 } }
%     \iow_term:e { \str_range:nnn { abc~efg } { 2 } { -3 } }
%     \iow_term:e { \str_range:nnn { abc~efg } { -6 } { 5 } }
%     \iow_term:e { \str_range:nnn { abc~efg } { -6 } { -3 } }
%
%     \iow_term:e { \str_range_ignore_spaces:nnn { abcdefg } { 2 } { 5 } }
%     \iow_term:e { \str_range_ignore_spaces:nnn { abcdefg } { 2 } { -3 } }
%     \iow_term:e { \str_range_ignore_spaces:nnn { abcdefg } { -6 } { 5 } }
%     \iow_term:e { \str_range_ignore_spaces:nnn { abcdefg } { -6 } { -3 } }
%
%     \iow_term:e { \str_range_ignore_spaces:nnn { abcd~efg } { 2 } { 5 } }
%     \iow_term:e { \str_range_ignore_spaces:nnn { abcd~efg } { 2 } { -3 } }
%     \iow_term:e { \str_range_ignore_spaces:nnn { abcd~efg } { -6 } { 5 } }
%     \iow_term:e { \str_range_ignore_spaces:nnn { abcd~efg } { -6 } { -3 } }
%   \end{verbatim}
%   will print four instances of |bcde|, four instances of |bc e| and eight
%   instances of |bcde|.
% ^^A\end{function}
%
% \section{Modifying string variables}
%
% \begin{function}[added = 2017-10-08]
%   {
%     \str_replace_once:Nnn,  \str_replace_once:cnn,
%     \str_greplace_once:Nnn, \str_greplace_once:cnn
%   }
%   \begin{syntax}
%     \cs{str_replace_once:Nnn} \meta{str~var} \Arg{old} \Arg{new}
%   \end{syntax}
%   Converts the \meta{old} and \meta{new} token lists to strings, then
%   replaces the first (leftmost) occurrence of \meta{old string} in the
%   \meta{str~var} with \meta{new string}.
% \end{function}
%
% \begin{function}[added = 2017-10-08]
%   {
%     \str_replace_all:Nnn, \str_replace_all:cnn,
%     \str_greplace_all:Nnn, \str_greplace_all:cnn
%   }
%   \begin{syntax}
%     \cs{str_replace_all:Nnn} \meta{str~var} \Arg{old} \Arg{new}
%   \end{syntax}
%   Converts the \meta{old} and \meta{new} token lists to strings, then
%   replaces all occurrences of \meta{old string} in the
%   \meta{str~var} with \meta{new string}.
%   As this function
%   operates from left to right, the pattern \meta{old string}
%   may remain after the replacement (see \cs{str_remove_all:Nn}
%   for an example).
% \end{function}
%
% \begin{function}[added = 2017-10-08]
%   {
%     \str_remove_once:Nn,  \str_remove_once:cn,
%     \str_gremove_once:Nn, \str_gremove_once:cn
%   }
%   \begin{syntax}
%     \cs{str_remove_once:Nn} \meta{str~var} \Arg{token list}
%   \end{syntax}
%   Converts the \meta{token list} to a \meta{string} then
%   removes the first (leftmost) occurrence of \meta{string} from the
%   \meta{str~var}.
% \end{function}
%
% \begin{function}[added = 2017-10-08]
%   {
%     \str_remove_all:Nn,  \str_remove_all:cn,
%     \str_gremove_all:Nn, \str_gremove_all:cn
%   }
%   \begin{syntax}
%     \cs{str_remove_all:Nn} \meta{str~var} \Arg{token list}
%   \end{syntax}
%   Converts the \meta{token list} to a \meta{string} then
%   removes all occurrences of \meta{string} from the
%   \meta{str~var}.
%   As this function
%   operates from left to right, the pattern \meta{string}
%   may remain after the removal, for instance,
%   \begin{quote}
%     \cs{str_set:Nn} \cs{l_tmpa_str} |{abbccd}|
%     \cs{str_remove_all:Nn} \cs{l_tmpa_str} |{bc}|
%   \end{quote}
%   results in \cs{l_tmpa_str} containing \texttt{abcd}.
% \end{function}
%
% \section{String manipulation}
%
% \begin{function}[EXP, added = 2019-11-26]
%    {
%      \str_lowercase:n, \str_lowercase:f,
%      \str_uppercase:n, \str_uppercase:f
%   }
%   \begin{syntax}
%     \cs{str_lowercase:n} \Arg{tokens}
%     \cs{str_uppercase:n} \Arg{tokens}
%   \end{syntax}
%   Converts the input \meta{tokens} to their string representation, as
%   described for \cs{tl_to_str:n}, and then to the lower or upper
%   case representation using a one-to-one mapping as described by the
%   Unicode Consortium file |UnicodeData.txt|.
%
%   These functions are intended for case changing programmatic data in
%   places where upper/lower case distinctions are meaningful. One example
%   would be automatically generating a function name from user input where
%   some case changing is needed. In this situation the input is programmatic,
%   not textual, case does have meaning and a language-independent one-to-one
%   mapping is appropriate. For example
%   \begin{verbatim}
%     \cs_new_protected:Npn \myfunc:nn #1#2
%       {
%         \cs_set_protected:cpn
%           {
%             user
%             \str_uppercase:f { \tl_head:n {#1} }
%             \str_lowercase:f { \tl_tail:n {#1} }
%           }
%           { #2 }
%       }
%   \end{verbatim}
%   would be used to generate a function with an auto-generated name consisting
%   of the upper case equivalent of the supplied name followed by the lower
%   case equivalent of the rest of the input.
%
%   These functions should \emph{not} be used for
%   \begin{itemize}
%     \item Caseless comparisons: use \cs{str_casefold:n} for this
%       situation (case folding is distinct from lower casing).
%     \item Case changing text for typesetting: see the
%       \cs[index=text_lowercase:n]{text_lowercase:n(n)},
%       \cs[index=text_uppercase:n]{text_uppercase:n(n)} and
%       \cs[index=text_titlecase_all:n]{text_titlecase_(all|once):n(n)} functions which
%       correctly deal with context-dependence and other factors appropriate
%       to text case changing.
%   \end{itemize}
% \end{function}
%
% \begin{function}[EXP, added = 2022-10-16]
%   {\str_casefold:n, \str_casefold:V}
%   \begin{syntax}
%     \cs{str_casefold:n} \Arg{tokens}
%   \end{syntax}
%   Converts the input \meta{tokens} to their string representation, as
%   described for \cs{tl_to_str:n}, and then folds the case of the resulting
%   \meta{string} to remove case information. The result of this process is
%   left in the input stream.
%
%   String folding is a process used for material such as identifiers rather
%   than for \enquote{text}. The folding provided by \cs{str_casefold:n}
%   follows the mappings provided by the \href{http://www.unicode.org}^^A
%   {Unicode Consortium}, who
%   \href{http://www.unicode.org/faq/casemap_charprop.html#2}{state}:
%   \begin{quote}
%     Case folding is primarily used for caseless comparison of text, such
%     as identifiers in a computer program, rather than actual text
%     transformation. Case folding in Unicode is based on the lowercase
%     mapping, but includes additional changes to the source text to help make
%     it language-insensitive and consistent. As a result, case-folded text
%     should be used solely for internal processing and generally should not be
%     stored or displayed to the end user.
%   \end{quote}
%   The folding approach implemented by \cs{str_casefold:n} follows the
%   \enquote{full} scheme defined by the Unicode Consortium
%   (\emph{e.g.}~\SS folds to \texttt{SS}). As case-folding is
%   a language-insensitive process, there is no special treatment of
%   Turkic input (\emph{i.e.}~\texttt{I} always folds to \texttt{i} and
%   not to \texttt{\i}).
% \end{function}
%
% \begin{function}[added = 2023-05-19, EXP]{\str_mdfive_hash:n, \str_mdfive_hash:e}
%   \begin{syntax}
%     \cs{str_mdfive_hash:n} \Arg{tl}
%   \end{syntax}
%   Expands to the MD5 sum generated from the \meta{tl}, which is converted
%   to a \meta{string} as described for \cs{tl_to_str:n}.
% \end{function}
%
% \section{Viewing strings}
%
% \begin{function}[added = 2015-09-18, updated = 2021-04-29]
%   {\str_show:N, \str_show:c, \str_show:n}
%   \begin{syntax}
%     \cs{str_show:N} \meta{str~var}
%   \end{syntax}
%   Displays the content of the \meta{str~var} on the terminal.
% \end{function}
%
% \begin{function}[added = 2019-02-15, updated = 2021-04-29]
%   {\str_log:N, \str_log:c, \str_log:n}
%   \begin{syntax}
%     \cs{str_log:N} \meta{str~var}
%   \end{syntax}
%   Writes the content of the \meta{str~var} in the log file.
% \end{function}
%
% \section{Constant strings}
%
% \begin{variable}[added = 2015-09-19, updated = 2020-12-22, module = str]
%   {
%     \c_ampersand_str,
%     \c_atsign_str,
%     \c_backslash_str,
%     \c_left_brace_str,
%     \c_right_brace_str,
%     \c_circumflex_str,
%     \c_colon_str,
%     \c_dollar_str,
%     \c_hash_str,
%     \c_percent_str,
%     \c_tilde_str,
%     \c_underscore_str,
%     \c_zero_str
%   }
%   Constant strings, containing a single character token, with category
%   code $12$.
% \end{variable}
%
% \begin{variable}[added = 2023-12-07]{\c_empty_str}
%   Constant that is always empty.
% \end{variable}
%
% \section{Scratch strings}
%
% \begin{variable}{\l_tmpa_str, \l_tmpb_str}
%   Scratch strings for local assignment. These are never used by
%   the kernel code, and so are safe for use with any \LaTeX3-defined
%   function. However, they may be overwritten by other non-kernel
%   code and so should only be used for short-term storage.
% \end{variable}
%
% \begin{variable}{\g_tmpa_str, \g_tmpb_str}
%   Scratch strings for global assignment. These are never used by
%   the kernel code, and so are safe for use with any \LaTeX3-defined
%   function. However, they may be overwritten by other non-kernel
%   code and so should only be used for short-term storage.
% \end{variable}
%
% \end{documentation}
%
% \begin{implementation}
%
% \section{\pkg{l3str} implementation}
%
%    \begin{macrocode}
%<*package>
%    \end{macrocode}
%
%    \begin{macrocode}
%<@@=str>
%    \end{macrocode}
%
% \subsection{Internal auxiliaries}
%
% \begin{variable}{\s_@@_mark,\s_@@_stop}
%   Internal scan marks.
%    \begin{macrocode}
\scan_new:N \s_@@_mark
\scan_new:N \s_@@_stop
%    \end{macrocode}
% \end{variable}
%
% \begin{macro}[EXP]{
%     \@@_use_none_delimit_by_s_stop:w,
%     \@@_use_i_delimit_by_s_stop:nw
%   }
%   Functions to gobble up to a scan mark.
%    \begin{macrocode}
\cs_new:Npn \@@_use_none_delimit_by_s_stop:w #1 \s_@@_stop { }
\cs_new:Npn \@@_use_i_delimit_by_s_stop:nw #1 #2 \s_@@_stop {#1}
%    \end{macrocode}
% \end{macro}
%
% \begin{variable}{\q_@@_recursion_tail,\q_@@_recursion_stop}
%   Internal recursion quarks.
%    \begin{macrocode}
\quark_new:N \q_@@_recursion_tail
\quark_new:N \q_@@_recursion_stop
%    \end{macrocode}
% \end{variable}
%
% \begin{macro}[EXP]{
%     \@@_if_recursion_tail_break:NN,
%     \@@_if_recursion_tail_stop_do:Nn
%   }
%   Functions to query recursion quarks.
%    \begin{macrocode}
\__kernel_quark_new_test:N \@@_if_recursion_tail_break:NN
\__kernel_quark_new_test:N \@@_if_recursion_tail_stop_do:Nn
%    \end{macrocode}
% \end{macro}
%
% \subsection{Creating and setting string variables}
%
% \begin{macro}
%   {
%     \str_new:N, \str_new:c,
%     \str_use:N, \str_use:c,
%     \str_clear:N, \str_clear:c,
%     \str_gclear:N,\str_gclear:c,
%     \str_clear_new:N, \str_clear_new:c,
%     \str_gclear_new:N, \str_gclear_new:c
%   }
% \begin{macro}
%   {
%     \str_set_eq:NN,  \str_set_eq:cN,  \str_set_eq:Nc,  \str_set_eq:cc,
%     \str_gset_eq:NN, \str_gset_eq:cN, \str_gset_eq:Nc, \str_gset_eq:cc
%   }
% \begin{macro}
%   {\str_concat:NNN, \str_concat:ccc, \str_gconcat:NNN, \str_gconcat:ccc}
%   A string is simply a token list. The full mapping system isn't set up
%   yet so do things by hand.
%    \begin{macrocode}
\group_begin:
  \cs_set_protected:Npn \@@_tmp:n #1
    {
      \tl_if_blank:nF {#1}
        {
          \cs_new_eq:cc { str_ #1 :N } { tl_ #1 :N }
          \exp_args:Nc \cs_generate_variant:Nn { str_ #1 :N } { c }
          \@@_tmp:n
        }
    }
  \@@_tmp:n
    { new }
    { use }
    { clear }
    { gclear }
    { clear_new }
    { gclear_new }
    { }
\group_end:
\cs_new_eq:NN \str_set_eq:NN \tl_set_eq:NN
\cs_new_eq:NN \str_gset_eq:NN \tl_gset_eq:NN
\cs_generate_variant:Nn \str_set_eq:NN  { c , Nc , cc }
\cs_generate_variant:Nn \str_gset_eq:NN { c , Nc , cc }
\cs_new_eq:NN \str_concat:NNN \tl_concat:NNN
\cs_new_eq:NN \str_gconcat:NNN \tl_gconcat:NNN
\cs_generate_variant:Nn \str_concat:NNN  { ccc }
\cs_generate_variant:Nn \str_gconcat:NNN { ccc }
%    \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
%
% \begin{macro}
%   {
%     \str_set:Nn, \str_set:NV, \str_set:Ne, \str_set:Nx,
%     \str_set:cn, \str_set:cV, \str_set:ce, \str_set:cx,
%     \str_gset:Nn, \str_gset:NV, \str_gset:Ne, \str_gset:Nx,
%     \str_gset:cn, \str_gset:cV, \str_gset:ce, \str_gset:cx,
%     \str_const:Nn, \str_const:NV, \str_const:Ne, \str_const:Nx,
%     \str_const:cn, \str_const:cV, \str_const:ce, \str_const:cx,
%     \str_put_left:Nn, \str_put_left:NV, \str_put_left:Ne, \str_put_left:Nx,
%     \str_put_left:cn, \str_put_left:cV, \str_put_left:ce, \str_put_left:cx,
%     \str_gput_left:Nn, \str_gput_left:NV, \str_gput_left:Ne, \str_gput_left:Nx,
%     \str_gput_left:cn, \str_gput_left:cV, \str_gput_left:ce, \str_gput_left:cx,
%     \str_put_right:Nn, \str_put_right:NV, \str_put_right:Ne, \str_put_right:Nx,
%     \str_put_right:cn, \str_put_right:cV, \str_put_right:ce, \str_put_right:cx,
%     \str_gput_right:Nn, \str_gput_right:NV, \str_gput_right:Ne, \str_gput_right:Nx,
%     \str_gput_right:cn, \str_gput_right:cV, \str_gput_right:ce, \str_gput_right:cx
%   }
%   Similar to corresponding \pkg{l3tl} base functions, except that
%   \cs{__kernel_exp_not:w} is replaced with \cs{__kernel_tl_to_str:w}.
%   Just like token list, string constants use \cs{cs_gset_nopar:Npe}
%   instead of \cs{__kernel_tl_gset:Nx} so that the scope checking for
%   |c| is applied when \pkg{l3debug} is used.
%   To maintain backward compatibility, in
%     \cs[index=str_put_left:Nn]{str_(g)put_left:Nn} and
%     \cs[index=str_put_right:Nn]{str_(g)put_right:Nn},
%   contents of string variables are wrapped in \cs{__kernel_exp_not:w}
%   to prevent further expansion.
%    \begin{macrocode}
\cs_new_protected:Npn \str_set:Nn #1#2
  { \__kernel_tl_set:Nx #1 { \__kernel_tl_to_str:w {#2} } }
\cs_gset_protected:Npn \str_gset:Nn #1#2
  { \__kernel_tl_gset:Nx #1 { \__kernel_tl_to_str:w {#2} } }
\cs_new_protected:Npn \str_const:Nn #1#2
  {
    \__kernel_chk_if_free_cs:N #1
    \cs_gset_nopar:Npe #1 { \__kernel_tl_to_str:w {#2} }
  }
\cs_new_protected:Npn \str_put_left:Nn #1#2
  {
    \__kernel_tl_set:Nx #1
      { \__kernel_tl_to_str:w {#2} \__kernel_exp_not:w \exp_after:wN {#1} }
  }
\cs_new_protected:Npn \str_gput_left:Nn #1#2
  {
    \__kernel_tl_gset:Nx #1
      { \__kernel_tl_to_str:w {#2} \__kernel_exp_not:w \exp_after:wN {#1} }
  }
\cs_new_protected:Npn \str_put_right:Nn #1#2
  {
    \__kernel_tl_set:Nx #1
      { \__kernel_exp_not:w \exp_after:wN {#1} \__kernel_tl_to_str:w {#2} }
  }
\cs_new_protected:Npn \str_gput_right:Nn #1#2
  {
    \__kernel_tl_gset:Nx #1
      { \__kernel_exp_not:w \exp_after:wN {#1} \__kernel_tl_to_str:w {#2} }
  }
\cs_generate_variant:Nn \str_set:Nn        { NV , Ne , Nx , c , cV , ce , cx }
\cs_generate_variant:Nn \str_gset:Nn       { NV , Ne , Nx , c , cV , ce , cx }
\cs_generate_variant:Nn \str_const:Nn      { NV , Ne , Nx , c , cV , ce , cx }
\cs_generate_variant:Nn \str_put_left:Nn   { NV , Ne , Nx , c , cV , ce , cx }
\cs_generate_variant:Nn \str_gput_left:Nn  { NV , Ne , Nx , c , cV , ce , cx }
\cs_generate_variant:Nn \str_put_right:Nn  { NV , Ne , Nx , c , cV , ce , cx }
\cs_generate_variant:Nn \str_gput_right:Nn { NV , Ne , Nx , c , cV , ce , cx }
%    \end{macrocode}
% \end{macro}
%
% \subsection{Modifying string variables}
%
% \begin{macro}
%   {
%     \str_replace_all:Nnn,   \str_replace_all:cnn,
%     \str_greplace_all:Nnn,  \str_greplace_all:cnn,
%     \str_replace_once:Nnn,  \str_replace_once:cnn,
%     \str_greplace_once:Nnn, \str_greplace_once:cnn
%   }
% \begin{macro}{\@@_replace:NNNnn}
% \begin{macro}{\@@_replace_aux:NNNnnn}
% \begin{macro}{\@@_replace_next:w}
%   Start by applying \cs{tl_to_str:n} to convert the old and new token
%   lists to strings, and also apply \cs{tl_to_str:N} to avoid any
%   issues if we are fed a token list variable.  Then the code is a much
%   simplified version of the token list code because neither the
%   delimiter nor the replacement can contain macro parameters or
%   braces.  The delimiter \cs{s_@@_mark} cannot appear in the string to
%   edit so it is used in all cases.  Some |e|-expansion is unnecessary.
%   There is no need to avoid losing braces nor to protect against
%   expansion.  The ending code is much simplified and does not need to
%   hide in braces.
%    \begin{macrocode}
\cs_new_protected:Npn \str_replace_once:Nnn
  { \@@_replace:NNNnn \prg_do_nothing: \__kernel_tl_set:Nx  }
\cs_new_protected:Npn \str_greplace_once:Nnn
  { \@@_replace:NNNnn \prg_do_nothing: \__kernel_tl_gset:Nx }
\cs_new_protected:Npn \str_replace_all:Nnn
  { \@@_replace:NNNnn \@@_replace_next:w \__kernel_tl_set:Nx  }
\cs_new_protected:Npn \str_greplace_all:Nnn
  { \@@_replace:NNNnn \@@_replace_next:w \__kernel_tl_gset:Nx }
\cs_generate_variant:Nn \str_replace_once:Nnn  { c }
\cs_generate_variant:Nn \str_greplace_once:Nnn { c }
\cs_generate_variant:Nn \str_replace_all:Nnn   { c }
\cs_generate_variant:Nn \str_greplace_all:Nnn  { c }
\cs_new_protected:Npn \@@_replace:NNNnn #1#2#3#4#5
  {
    \tl_if_empty:nTF {#4}
      {
        \msg_error:nne { kernel } { empty-search-pattern } {#5}
      }
      {
        \use:e
          {
            \exp_not:n { \@@_replace_aux:NNNnnn #1 #2 #3 }
              { \tl_to_str:N #3 }
              { \tl_to_str:n {#4} } { \tl_to_str:n {#5} }
          }
      }
  }
\cs_new_protected:Npn \@@_replace_aux:NNNnnn #1#2#3#4#5#6
  {
    \cs_set:Npn \@@_replace_next:w ##1 #5 { ##1 #6 #1 }
    #2 #3
      {
        \@@_replace_next:w
        #4
        \@@_use_none_delimit_by_s_stop:w
        #5
        \s_@@_stop
      }
  }
\cs_new_eq:NN \@@_replace_next:w ?
%    \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
%
% \begin{macro}{\str_remove_once:Nn, \str_remove_once:cn}
% \begin{macro}{\str_gremove_once:Nn, \str_gremove_once:cn}
%   Removal is just a special case of replacement.
%    \begin{macrocode}
\cs_new_protected:Npn \str_remove_once:Nn #1#2
  { \str_replace_once:Nnn #1 {#2} { } }
\cs_new_protected:Npn \str_gremove_once:Nn #1#2
  { \str_greplace_once:Nnn #1 {#2} { } }
\cs_generate_variant:Nn \str_remove_once:Nn  { c }
\cs_generate_variant:Nn \str_gremove_once:Nn { c }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \begin{macro}{\str_remove_all:Nn, \str_remove_all:cn}
% \begin{macro}{\str_gremove_all:Nn, \str_gremove_all:cn}
%   Removal is just a special case of replacement.
%    \begin{macrocode}
\cs_new_protected:Npn \str_remove_all:Nn #1#2
  { \str_replace_all:Nnn #1 {#2} { } }
\cs_new_protected:Npn \str_gremove_all:Nn #1#2
  { \str_greplace_all:Nnn #1 {#2} { } }
\cs_generate_variant:Nn \str_remove_all:Nn  { c }
\cs_generate_variant:Nn \str_gremove_all:Nn { c }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \subsection{String comparisons}
%
% \begin{macro}[pTF, EXP]
%   {
%     \str_if_empty:N, \str_if_empty:c, \str_if_empty:n,
%     \str_if_exist:N, \str_if_exist:c
%   }
%   More copy-paste!
%    \begin{macrocode}
\prg_new_eq_conditional:NNn \str_if_exist:N \tl_if_exist:N
  { p , T , F , TF }
\prg_new_eq_conditional:NNn \str_if_exist:c \tl_if_exist:c
  { p , T , F , TF }
\prg_new_eq_conditional:NNn \str_if_empty:N \tl_if_empty:N
  { p , T , F , TF }
\prg_new_eq_conditional:NNn \str_if_empty:c \tl_if_empty:c
  { p , T , F , TF }
\prg_new_eq_conditional:NNn \str_if_empty:n \tl_if_empty:n
  { p , T , F , TF }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}[EXP]{\@@_if_eq:nn}
%   String comparisons rely on the primitive \tn[index=pdfstrcmp]{(pdf)strcmp},
%   so we define a new name for it.
%    \begin{macrocode}
\cs_new_eq:NN \@@_if_eq:nn \tex_strcmp:D
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}[pTF, EXP]{\str_compare:nNn, \str_compare:eNe}
%   Simply rely on \cs{@@_if_eq:nn}, which expands to |-1|, |0|
%   or~|1|.  The |ee| version is created directly because it is more efficient.
%    \begin{macrocode}
\prg_new_conditional:Npnn \str_compare:nNn #1#2#3 { p , T , F , TF }
  {
    \if_int_compare:w
      \@@_if_eq:nn { \exp_not:n {#1} } { \exp_not:n {#3} }
      #2 \c_zero_int
      \prg_return_true: \else: \prg_return_false: \fi:
  }
\prg_new_conditional:Npnn \str_compare:eNe #1#2#3 { p , T , F , TF }
  {
    \if_int_compare:w \@@_if_eq:nn {#1} {#3} #2 \c_zero_int
      \prg_return_true: \else: \prg_return_false: \fi:
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}[pTF, EXP]
%   {
%     \str_if_eq:nn, \str_if_eq:Vn, \str_if_eq:on, \str_if_eq:nV,
%     \str_if_eq:no, \str_if_eq:VV,
%     \str_if_eq:ee
%   }
%   Modern engines provide a direct way of comparing two token lists,
%   but returning a number. This set of conditionals therefore makes life
%   a bit clearer. The \texttt{nn} and \texttt{ee} versions are created
%   directly as this is most efficient. Since \cs{@@_if_eq:nn} will expand to
%   |0| as an explicit character with category 12 if the two lists match (and
%   either |-1| or |1| if they don't) we can use \cs{if:w} here which is faster
%   than using \cs{if_int_compare:w}.
%    \begin{macrocode}
\prg_new_conditional:Npnn \str_if_eq:nn #1#2 { p , T , F , TF }
  {
    \if:w 0 \@@_if_eq:nn { \exp_not:n {#1} } { \exp_not:n {#2} }
      \prg_return_true: \else: \prg_return_false: \fi:
  }
\prg_generate_conditional_variant:Nnn \str_if_eq:nn
  { V , v , o , nV , no , VV , nv } { p , T , F , TF }
\prg_new_conditional:Npnn \str_if_eq:ee #1#2 { p , T , F , TF }
  {
    \if:w 0 \@@_if_eq:nn {#1} {#2}
      \prg_return_true: \else: \prg_return_false: \fi:
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}[EXP, pTF]
%   {\str_if_eq:NN, \str_if_eq:Nc, \str_if_eq:cN, \str_if_eq:cc}
%   Note that \cs{str_if_eq:NNTF} is different from
%   \cs{tl_if_eq:NNTF} because it needs to ignore category codes.
%    \begin{macrocode}
\prg_new_conditional:Npnn \str_if_eq:NN #1#2 { p , TF , T , F }
  {
    \if:w 0 \@@_if_eq:nn { \tl_to_str:N #1 } { \tl_to_str:N #2 }
      \prg_return_true: \else: \prg_return_false: \fi:
  }
\prg_generate_conditional_variant:Nnn \str_if_eq:NN
  { c , Nc , cc } { T , F , TF , p }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}[TF]{\str_if_in:Nn, \str_if_in:cn, \str_if_in:nn}
%   Everything here needs to be detokenized but beyond that it is a
%   simple token list test.  It would be faster to fine-tune the |T|,
%   |F|, |TF| variants by calling the appropriate variant of
%   \cs{tl_if_in:nnTF} directly but that takes more code.
%    \begin{macrocode}
\prg_new_protected_conditional:Npnn \str_if_in:Nn #1#2 { T , F , TF }
  {
    \use:e
      { \tl_if_in:nnTF { \tl_to_str:N #1 } { \tl_to_str:n {#2} } }
      { \prg_return_true: } { \prg_return_false: }
  }
\prg_generate_conditional_variant:Nnn \str_if_in:Nn
  { c } { T , F , TF }
\prg_new_protected_conditional:Npnn \str_if_in:nn #1#2 { T , F , TF }
  {
    \use:e
      { \tl_if_in:nnTF { \tl_to_str:n {#1} } { \tl_to_str:n {#2} } }
      { \prg_return_true: } { \prg_return_false: }
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}[EXP, noTF]
%   {
%     \str_case:nn, \str_case:Vn, \str_case:Nn, \str_case:on, \str_case:en, \str_case:nV, \str_case:nv,
%     \str_case_e:nn, \str_case_e:en
%   }
% \begin{macro}[EXP]{\@@_case:nnTF, \@@_case_e:nnTF}
% \begin{macro}[EXP]
%   {\@@_case:nw, \@@_case_e:nw, \@@_case_end:nw}
%   The aim here is to allow the case statement to be evaluated
%   using a known number of expansion steps (two), and without
%   needing to use an explicit \enquote{end of recursion} marker.
%   That is achieved by using the test input as the final case,
%   as this is always true. The trick is then to tidy up
%   the output such that the appropriate case code plus either
%   the \texttt{true} or \texttt{false} branch code is inserted.
%    \begin{macrocode}
\cs_new:Npn \str_case:nn #1#2
  {
    \exp:w
    \@@_case:nnTF {#1} {#2} { } { }
  }
\cs_new:Npn \str_case:nnT #1#2#3
  {
    \exp:w
    \@@_case:nnTF {#1} {#2} {#3} { }
  }
\cs_new:Npn \str_case:nnF #1#2
  {
    \exp:w
    \@@_case:nnTF {#1} {#2} { }
  }
\cs_new:Npn \str_case:nnTF #1#2
  {
    \exp:w
    \@@_case:nnTF {#1} {#2}
  }
\cs_new:Npn \@@_case:nnTF #1#2#3#4
  { \@@_case:nw {#1} #2 {#1} { } \s_@@_mark {#3} \s_@@_mark {#4} \s_@@_stop }
\cs_generate_variant:Nn \str_case:nn   { V , o , e , nV , nv }
\prg_generate_conditional_variant:Nnn \str_case:nn
  { V , o , e , nV , nv } { T , F , TF }
\cs_new_eq:NN \str_case:Nn   \str_case:Vn
\cs_new_eq:NN \str_case:NnT  \str_case:VnT
\cs_new_eq:NN \str_case:NnF  \str_case:VnF
\cs_new_eq:NN \str_case:NnTF \str_case:VnTF
\cs_new:Npn \@@_case:nw #1#2#3
  {
    \str_if_eq:nnTF {#1} {#2}
      { \@@_case_end:nw {#3} }
      { \@@_case:nw {#1} }
  }
\cs_new:Npn \str_case_e:nn #1#2
  {
    \exp:w
    \@@_case_e:nnTF {#1} {#2} { } { }
  }
\cs_new:Npn \str_case_e:nnT #1#2#3
  {
    \exp:w
    \@@_case_e:nnTF {#1} {#2} {#3} { }
  }
\cs_new:Npn \str_case_e:nnF #1#2
  {
    \exp:w
    \@@_case_e:nnTF {#1} {#2} { }
  }
\cs_new:Npn \str_case_e:nnTF #1#2
  {
    \exp:w
    \@@_case_e:nnTF {#1} {#2}
  }
\cs_new:Npn \@@_case_e:nnTF #1#2#3#4
  { \@@_case_e:nw {#1} #2 {#1} { } \s_@@_mark {#3} \s_@@_mark {#4} \s_@@_stop }
\cs_generate_variant:Nn \str_case_e:nn { e }
\prg_generate_conditional_variant:Nnn \str_case_e:nn { e } { T , F , TF }
\cs_new:Npn \@@_case_e:nw #1#2#3
  {
    \str_if_eq:eeTF {#1} {#2}
      { \@@_case_end:nw {#3} }
      { \@@_case_e:nw {#1} }
  }
%    \end{macrocode}
%   To tidy up the recursion, there are two outcomes. If there was a hit to
%   one of the cases searched for, then |#1| is the code to insert,
%   |#2| is the \emph{next} case to check on and |#3| is all of
%   the rest of the cases code. That means that |#4| is the \texttt{true}
%   branch code, and |#5| tidies up the spare \cs{s_@@_mark} and the
%   \texttt{false} branch. On the other hand, if none of the cases matched
%   then we arrive here using the \enquote{termination} case of comparing
%   the search with itself. That means that |#1| is empty, |#2| is
%   the first \cs{s_@@_mark} and so |#4| is the \texttt{false} code (the
%   \texttt{true} code is mopped up by |#3|).
%    \begin{macrocode}
\cs_new:Npn \@@_case_end:nw #1#2#3 \s_@@_mark #4#5 \s_@@_stop
  { \exp_end: #1 #4 }
%    \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
%
% \subsection{Mapping over strings}
%
% \begin{macro}[rEXP]{\str_map_function:NN, \str_map_function:cN}
% \begin{macro}[rEXP]{\str_map_function:nN}
% \begin{macro}{\str_map_inline:Nn, \str_map_inline:cn}
% \begin{macro}{\str_map_inline:nn}
% \begin{macro}{\str_map_variable:NNn, \str_map_variable:cNn}
% \begin{macro}{\str_map_variable:nNn}
% \begin{macro}{\str_map_break:}
% \begin{macro}{\str_map_break:n}
% \begin{macro}[rEXP]{\@@_map_function:w, \@@_map_function:nn}
% \begin{macro}{\@@_map_inline:NN, \@@_map_variable:NnN}
%   The inline and variable mappings are similar to the usual token list
%   mappings but start out by turning the argument to an ``other
%   string''.  Doing the same for the expandable function mapping would
%   require \cs{__kernel_str_to_other:n}, quadratic in the string length.  To deal
%   with spaces in that case, \cs{@@_map_function:w} replaces the
%   following space by a braced space and a further call to itself.
%   These are received by \cs{@@_map_function:nn}, which passes
%   the space to |#1| and calls \cs{@@_map_function:w} to deal with the
%   next space.  The space before the braced space allows to optimize
%   the \cs{q_@@_recursion_tail} test.  Of course we need to include a
%   trailing space (the question mark is needed to avoid losing the
%   space when \TeX{} tokenizes the line).
%   At the cost of about three more auxiliaries this code could get a $9$
%   times speed up by testing only every $9$-th character for whether it
%   is \cs{q_@@_recursion_tail} (also by converting $9$ spaces at a time in
%   the \cs{str_map_function:nN} case).
%
%   For the \texttt{map_variable} functions we use a string assignment
%   to store each character because spaces are made catcode~$12$ before
%   the loop.
%    \begin{macrocode}
\cs_new:Npn \str_map_function:nN #1#2
  {
    \exp_after:wN \@@_map_function:w
    \exp_after:wN \@@_map_function:nn \exp_after:wN #2
      \__kernel_tl_to_str:w {#1}
      \q_@@_recursion_tail ? ~
    \prg_break_point:Nn \str_map_break: { }
  }
\cs_new:Npn \str_map_function:NN
  { \exp_args:No \str_map_function:nN }
\cs_new:Npn \@@_map_function:w #1 ~
  { #1 { ~ { ~ } \@@_map_function:w } }
\cs_new:Npn \@@_map_function:nn #1#2
  {
    \if_meaning:w \q_@@_recursion_tail #2
      \exp_after:wN \str_map_break:
    \fi:
    #1 #2 \@@_map_function:nn {#1}
  }
\cs_generate_variant:Nn \str_map_function:NN { c }
\cs_new_protected:Npn \str_map_inline:nn #1#2
  {
    \int_gincr:N \g__kernel_prg_map_int
    \cs_gset_protected:cpn
      { @@_map_ \int_use:N \g__kernel_prg_map_int :w } ##1 {#2}
    \use:e
      {
        \exp_not:N \@@_map_inline:NN
        \exp_not:c { @@_map_ \int_use:N \g__kernel_prg_map_int :w }
        \__kernel_str_to_other_fast:n {#1}
      }
      \q_@@_recursion_tail
    \prg_break_point:Nn \str_map_break:
      { \int_gdecr:N \g__kernel_prg_map_int }
  }
\cs_new_protected:Npn \str_map_inline:Nn
  { \exp_args:No \str_map_inline:nn }
\cs_generate_variant:Nn \str_map_inline:Nn { c }
\cs_new:Npn \@@_map_inline:NN #1#2
  {
    \@@_if_recursion_tail_break:NN #2 \str_map_break:
    \exp_args:No #1 { \token_to_str:N #2 }
    \@@_map_inline:NN #1
  }
\cs_new_protected:Npn \str_map_variable:nNn #1#2#3
  {
    \use:e
      {
        \exp_not:n { \@@_map_variable:NnN #2 {#3} }
        \__kernel_str_to_other_fast:n {#1}
      }
      \q_@@_recursion_tail
    \prg_break_point:Nn \str_map_break: { }
  }
\cs_new_protected:Npn \str_map_variable:NNn
  { \exp_args:No \str_map_variable:nNn }
\cs_new_protected:Npn \@@_map_variable:NnN #1#2#3
  {
    \@@_if_recursion_tail_break:NN #3 \str_map_break:
    \str_set:Nn #1 {#3}
    \use:n {#2}
    \@@_map_variable:NnN #1 {#2}
  }
\cs_generate_variant:Nn \str_map_variable:NNn { c }
\cs_new:Npn \str_map_break:
  { \prg_map_break:Nn \str_map_break: { } }
\cs_new:Npn \str_map_break:n
  { \prg_map_break:Nn \str_map_break: }
%    \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
%
% \begin{macro}[rEXP]{\str_map_tokens:Nn, \str_map_tokens:cn}
% \begin{macro}[rEXP]{\str_map_tokens:nn}
%   Uses an auxiliary of \cs{str_map_function:NN}.
%    \begin{macrocode}
\cs_new:Npn \str_map_tokens:nn #1#2
  {
    \exp_args:Nno \use:nn
      { \@@_map_function:w \@@_map_function:nn {#2} }
      { \__kernel_tl_to_str:w {#1} }
      \q_@@_recursion_tail ? ~
    \prg_break_point:Nn \str_map_break: { }
  }
\cs_new:Npn \str_map_tokens:Nn { \exp_args:No \str_map_tokens:nn }
\cs_generate_variant:Nn \str_map_tokens:Nn { c }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \subsection{Accessing specific characters in a string}
%
% \begin{macro}[EXP]{\__kernel_str_to_other:n}
% \begin{macro}[EXP]{\@@_to_other_loop:w, \@@_to_other_end:w}
%   First apply \cs{tl_to_str:n}, then replace all spaces by
%   \enquote{other} spaces, $8$ at a time, storing the converted part of
%   the string between the \cs{s_@@_mark} and \cs{s_@@_stop} markers.  The end
%   is detected when \cs{@@_to_other_loop:w} finds one of the trailing
%   |A|, distinguished from any contents of the initial token list by
%   their category.  Then \cs{@@_to_other_end:w} is called, and finds
%   the result between \cs{s_@@_mark} and the first |A| (well, there is
%   also the need to remove a space).
%    \begin{macrocode}
\cs_new:Npn \__kernel_str_to_other:n #1
  {
    \exp_after:wN \@@_to_other_loop:w
      \tl_to_str:n {#1} ~ A ~ A ~ A ~ A ~ A ~ A ~ A ~ A ~ \s_@@_mark \s_@@_stop
  }
\group_begin:
\tex_lccode:D `\* = `\  %
\tex_lccode:D `\A = `\A %
\tex_lowercase:D
  {
    \group_end:
    \cs_new:Npn \@@_to_other_loop:w
      #1 ~ #2 ~ #3 ~ #4 ~ #5 ~ #6 ~ #7 ~ #8 ~ #9 \s_@@_stop
      {
        \if_meaning:w A #8
          \@@_to_other_end:w
        \fi:
        \@@_to_other_loop:w
        #9 #1 * #2 * #3 * #4 * #5 * #6 * #7 * #8 * \s_@@_stop
      }
    \cs_new:Npn \@@_to_other_end:w \fi: #1 \s_@@_mark #2 * A #3 \s_@@_stop
      { \fi: #2 }
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \begin{macro}[rEXP]{\__kernel_str_to_other_fast:n}
% \begin{macro}[rEXP]{\__kernel_str_to_other_fast_loop:w, \@@_to_other_fast_end:w}
%   The difference with \cs{__kernel_str_to_other:n} is that the converted part is
%   left in the input stream, making these commands only
%   restricted-expandable.
%    \begin{macrocode}
\cs_new:Npn \__kernel_str_to_other_fast:n #1
  {
    \exp_after:wN \@@_to_other_fast_loop:w \tl_to_str:n {#1} ~
      A ~ A ~ A ~ A ~ A ~ A ~ A ~ A ~ A ~ \s_@@_stop
  }
\group_begin:
\tex_lccode:D `\* = `\  %
\tex_lccode:D `\A = `\A %
\tex_lowercase:D
  {
    \group_end:
    \cs_new:Npn \@@_to_other_fast_loop:w
      #1 ~ #2 ~ #3 ~ #4 ~ #5 ~ #6 ~ #7 ~ #8 ~ #9 ~
      {
        \if_meaning:w A #9
          \@@_to_other_fast_end:w
        \fi:
        #1 * #2 * #3 * #4 * #5 * #6 * #7 * #8 * #9
        \@@_to_other_fast_loop:w *
      }
    \cs_new:Npn \@@_to_other_fast_end:w #1 * A #2 \s_@@_stop {#1}
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \begin{macro}[EXP]
%   {\str_item:Nn, \str_item:cn, \str_item:nn, \str_item_ignore_spaces:nn}
% \begin{macro}[EXP]{\@@_item:nn, \@@_item:w}
%   The \cs{str_item:nn} hands its argument with spaces escaped to
%   \cs{@@_item:nn}, and makes sure to turn the result back into
%   a proper string (with category code~$10$ spaces) eventually.  The
%   \cs{str_item_ignore_spaces:nn} function does not escape spaces,
%   which are thus ignored by \cs{@@_item:nn} since
%   everything else is done with undelimited arguments.
%   Evaluate the \meta{index} argument~|#2| and count characters in
%   the string, passing those two numbers to \cs{@@_item:w} for
%   further analysis.  If the \meta{index} is negative, shift it by
%   the \meta{count} to know the how many character to discard, and if
%   that is still negative give an empty result.  If the \meta{index}
%   is larger than the \meta{count}, give an empty result, and
%   otherwise discard $\meta{index}-1$ characters before returning the
%   following one.  The shift by $-1$ is obtained by inserting an empty
%   brace group before the string in that case: that brace group also
%   covers the case where the \meta{index} is zero.
%    \begin{macrocode}
\cs_new:Npn \str_item:Nn { \exp_args:No \str_item:nn }
\cs_generate_variant:Nn \str_item:Nn { c }
\cs_new:Npn \str_item:nn #1#2
  {
    \exp_args:Nf \tl_to_str:n
      {
        \exp_args:Nf \@@_item:nn
          { \__kernel_str_to_other:n {#1} } {#2}
      }
  }
\cs_new:Npn \str_item_ignore_spaces:nn #1
  { \exp_args:No \@@_item:nn { \tl_to_str:n {#1} } }
\cs_new:Npn \@@_item:nn #1#2
  {
    \exp_after:wN \@@_item:w
    \int_value:w \int_eval:n {#2} \exp_after:wN ;
    \int_value:w \@@_count:n {#1} ;
    #1 \s_@@_stop
  }
\cs_new:Npn \@@_item:w #1; #2;
  {
    \int_compare:nNnTF {#1} < 0
      {
        \int_compare:nNnTF {#1} < {-#2}
          { \@@_use_none_delimit_by_s_stop:w }
          {
            \exp_after:wN \@@_use_i_delimit_by_s_stop:nw
            \exp:w \exp_after:wN \@@_skip_exp_end:w
              \int_value:w \int_eval:n { #1 + #2 } ;
          }
      }
      {
        \int_compare:nNnTF {#1} > {#2}
          { \@@_use_none_delimit_by_s_stop:w }
          {
            \exp_after:wN \@@_use_i_delimit_by_s_stop:nw
            \exp:w \@@_skip_exp_end:w #1 ; { }
          }
      }
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \begin{macro}[EXP]{\@@_skip_exp_end:w}
% \begin{macro}[EXP]
%   {\@@_skip_loop:wNNNNNNNN, \@@_skip_end:w, \@@_skip_end:NNNNNNNN}
%   Removes |max(#1,0)| characters from the input stream, and then
%   leaves \cs{exp_end:}.  This should be expanded using
%   \cs{exp:w}.  We remove characters $8$ at a time until
%   there are at most $8$ to remove.  Then we do a dirty trick: the
%   \cs{if_case:w} construction leaves between $0$ and $8$ times the
%   \cs{or:} control sequence, and those \cs{or:} become arguments of
%   \cs{@@_skip_end:NNNNNNNN}.  If the number of characters to remove
%   is $6$, say, then there are two \cs{or:} left, and the $8$ arguments
%   of \cs{@@_skip_end:NNNNNNNN} are the two \cs{or:}, and $6$
%   characters from the input stream, exactly what we wanted to
%   remove. Then close the \cs{if_case:w} conditional with \cs{fi:}, and
%   stop the initial expansion with \cs{exp_end:} (see places where
%   \cs{@@_skip_exp_end:w} is called).
%    \begin{macrocode}
\cs_new:Npn \@@_skip_exp_end:w #1;
  {
    \if_int_compare:w #1 > 8 \exp_stop_f:
      \exp_after:wN \@@_skip_loop:wNNNNNNNN
    \else:
      \exp_after:wN \@@_skip_end:w
      \int_value:w \int_eval:w
    \fi:
    #1 ;
  }
\cs_new:Npn \@@_skip_loop:wNNNNNNNN #1; #2#3#4#5#6#7#8#9
  {
    \exp_after:wN \@@_skip_exp_end:w
      \int_value:w \int_eval:n { #1 - 8 } ;
  }
\cs_new:Npn \@@_skip_end:w #1 ;
  {
    \exp_after:wN \@@_skip_end:NNNNNNNN
    \if_case:w #1 \exp_stop_f: \or: \or: \or: \or: \or: \or: \or: \or:
  }
\cs_new:Npn \@@_skip_end:NNNNNNNN #1#2#3#4#5#6#7#8 { \fi: \exp_end: }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \begin{macro}[EXP]
%   {\str_range:Nnn, \str_range:nnn, \str_range_ignore_spaces:nnn}
% \begin{macro}[EXP]{\@@_range:nnn}
% \begin{macro}[EXP]{\@@_range:w, \@@_range:nnw}
%   Sanitize the string.  Then evaluate the arguments.  At this stage we
%   also decrement the \meta{start index}, since our goal is to know how
%   many characters should be removed.  Then limit the range to be
%   non-negative and at most the length of the string (this avoids
%   needing to check for the end of the string when grabbing
%   characters), shifting negative numbers by the appropriate amount.
%   Afterwards, skip characters, then keep some more, and finally drop
%   the end of the string.
%    \begin{macrocode}
\cs_new:Npn \str_range:Nnn { \exp_args:No \str_range:nnn }
\cs_generate_variant:Nn \str_range:Nnn { c }
\cs_new:Npn \str_range:nnn #1#2#3
  {
    \exp_args:Nf \tl_to_str:n
      {
        \exp_args:Nf \@@_range:nnn
          { \__kernel_str_to_other:n {#1} } {#2} {#3}
      }
  }
\cs_new:Npn \str_range_ignore_spaces:nnn #1
  { \exp_args:No \@@_range:nnn { \tl_to_str:n {#1} } }
\cs_new:Npn \@@_range:nnn #1#2#3
  {
    \exp_after:wN \@@_range:w
    \int_value:w \@@_count:n {#1} \exp_after:wN ;
    \int_value:w \int_eval:n { (#2) - 1 } \exp_after:wN ;
    \int_value:w \int_eval:n {#3} ;
    #1 \s_@@_stop
  }
\cs_new:Npn \@@_range:w #1; #2; #3;
  {
    \exp_args:Nf \@@_range:nnw
      { \@@_range_normalize:nn {#2} {#1} }
      { \@@_range_normalize:nn {#3} {#1} }
  }
\cs_new:Npn \@@_range:nnw #1#2
  {
    \exp_after:wN \@@_collect_delimit_by_q_stop:w
    \int_value:w \int_eval:n { #2 - #1 } \exp_after:wN ;
    \exp:w \@@_skip_exp_end:w #1 ;
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
% \begin{macro}[EXP]{\@@_range_normalize:nn}
%   This function converts an \meta{index} argument into an explicit
%   position in the string (a result of $0$ denoting \enquote{out of
%     bounds}).  Expects two explicit integer arguments: the
%   \meta{index} |#1| and the string count~|#2|.  If |#1| is negative,
%   replace it by $|#1| + |#2| + 1$, then limit to the range $[0,
%   |#2|]$.
%    \begin{macrocode}
\cs_new:Npn \@@_range_normalize:nn #1#2
  {
    \int_eval:n
      {
        \if_int_compare:w #1 < \c_zero_int
          \if_int_compare:w #1 < -#2 \exp_stop_f:
            0
          \else:
            #1 + #2 + 1
          \fi:
        \else:
          \if_int_compare:w #1 < #2 \exp_stop_f:
            #1
          \else:
            #2
          \fi:
        \fi:
      }
  }
%    \end{macrocode}
% \end{macro}
% \begin{macro}[EXP]{\@@_collect_delimit_by_q_stop:w}
% \begin{macro}[EXP]
%   {
%     \@@_collect_loop:wn, \@@_collect_loop:wnNNNNNNN,
%     \@@_collect_end:wn, \@@_collect_end:nnnnnnnnw
%   }
%   Collects |max(#1,0)| characters, and removes everything else until
%   \cs{s_@@_stop}. This is somewhat similar to \cs{@@_skip_exp_end:w}, but
%   accepts integer expression arguments.  This time we can only grab
%   $7$ characters at a time.  At the end, we use an \cs{if_case:w}
%   trick again, so that the $8$ first arguments of
%   \cs{@@_collect_end:nnnnnnnnw} are some \cs{or:}, followed by an
%   \cs{fi:}, followed by |#1| characters from the input stream. Simply
%   leaving this in the input stream closes the conditional properly
%   and the \cs{or:} disappear.
%    \begin{macrocode}
\cs_new:Npn \@@_collect_delimit_by_q_stop:w #1;
  { \@@_collect_loop:wn #1 ; { } }
\cs_new:Npn \@@_collect_loop:wn #1 ;
  {
    \if_int_compare:w #1 > 7 \exp_stop_f:
      \exp_after:wN \@@_collect_loop:wnNNNNNNN
    \else:
      \exp_after:wN \@@_collect_end:wn
    \fi:
    #1 ;
  }
\cs_new:Npn \@@_collect_loop:wnNNNNNNN #1; #2 #3#4#5#6#7#8#9
  {
    \exp_after:wN \@@_collect_loop:wn
    \int_value:w \int_eval:n { #1 - 7 } ;
    { #2 #3#4#5#6#7#8#9 }
  }
\cs_new:Npn \@@_collect_end:wn #1 ;
  {
    \exp_after:wN \@@_collect_end:nnnnnnnnw
    \if_case:w \if_int_compare:w #1 > \c_zero_int
      #1 \else: 0 \fi: \exp_stop_f:
      \or: \or: \or: \or: \or: \or: \fi:
  }
\cs_new:Npn \@@_collect_end:nnnnnnnnw #1#2#3#4#5#6#7#8 #9 \s_@@_stop
  { #1#2#3#4#5#6#7#8 }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \subsection{Counting characters}
%
% \begin{macro}[EXP]
%   {\str_count_spaces:N, \str_count_spaces:c, \str_count_spaces:n}
% \begin{macro}[EXP]{\@@_count_spaces_loop:w}
%   To speed up this function, we grab and discard $9$ space-delimited
%   arguments in each iteration of the loop.  The loop stops when the
%   last argument is one of the trailing |X|\meta{number}, and that
%   \meta{number} is added to the sum of $9$ that precedes, to adjust
%   the result.
%    \begin{macrocode}
\cs_new:Npn \str_count_spaces:N
  { \exp_args:No \str_count_spaces:n }
\cs_generate_variant:Nn \str_count_spaces:N { c }
\cs_new:Npn \str_count_spaces:n #1
  {
    \int_eval:n
      {
        \exp_after:wN \@@_count_spaces_loop:w
        \tl_to_str:n {#1} ~
        X 7 ~ X 6 ~ X 5 ~ X 4 ~ X 3 ~ X 2 ~ X 1 ~ X 0 ~ X -1 ~
        \s_@@_stop
      }
  }
\cs_new:Npn \@@_count_spaces_loop:w #1~#2~#3~#4~#5~#6~#7~#8~#9~
  {
    \if_meaning:w X #9
      \@@_use_i_delimit_by_s_stop:nw
    \fi:
    9 + \@@_count_spaces_loop:w
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \begin{macro}[EXP]
%   {\str_count:N, \str_count:c, \str_count:n, \str_count_ignore_spaces:n}
% \begin{macro}[EXP]{\@@_count:n}
% \begin{macro}[EXP]{\@@_count_aux:n, \@@_count_loop:NNNNNNNNN}
%   To count characters in a string we could first escape all spaces
%   using \cs{__kernel_str_to_other:n}, then pass the result to \cs{tl_count:n}.
%   However, the escaping step would be quadratic in the number of
%   characters in the string, and we can do better.  Namely, sum the
%   number of spaces (\cs{str_count_spaces:n}) and the result of
%   \cs{tl_count:n}, which ignores spaces.  Since strings tend to be
%   longer than token lists, we use specialized functions to count
%   characters ignoring spaces.  Namely, loop, grabbing $9$ non-space
%   characters at each step, and end as soon as we reach one of the $9$
%   trailing items.  The internal function \cs{@@_count:n}, used in
%   \cs{str_item:nn} and \cs{str_range:nnn}, is similar to
%   \cs{str_count_ignore_spaces:n} but expects its argument to already
%   be a string or a string with spaces escaped.
%    \begin{macrocode}
\cs_new:Npn \str_count:N { \exp_args:No \str_count:n }
\cs_generate_variant:Nn \str_count:N { c }
\cs_new:Npn \str_count:n #1
  {
    \@@_count_aux:n
      {
        \str_count_spaces:n {#1}
        + \exp_after:wN \@@_count_loop:NNNNNNNNN \tl_to_str:n {#1}
      }
  }
\cs_new:Npn \@@_count:n #1
  {
    \@@_count_aux:n
      { \@@_count_loop:NNNNNNNNN #1 }
  }
\cs_new:Npn \str_count_ignore_spaces:n #1
  {
    \@@_count_aux:n
      { \exp_after:wN \@@_count_loop:NNNNNNNNN \tl_to_str:n {#1} }
  }
\cs_new:Npn \@@_count_aux:n #1
  {
    \int_eval:n
      {
        #1
        { X 8 } { X 7 } { X 6 }
        { X 5 } { X 4 } { X 3 }
        { X 2 } { X 1 } { X 0 }
        \s_@@_stop
      }
  }
\cs_new:Npn \@@_count_loop:NNNNNNNNN #1#2#3#4#5#6#7#8#9
  {
    \if_meaning:w X #9
      \exp_after:wN \@@_use_none_delimit_by_s_stop:w
    \fi:
    9 + \@@_count_loop:NNNNNNNNN
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
%
% \subsection{The first character in a string}
%
% \begin{macro}[EXP]
%   {\str_head:N, \str_head:c, \str_head:n, \str_head_ignore_spaces:n}
% \begin{macro}[EXP]{\@@_head:w}
%   The \texttt{_ignore_spaces} variant applies \cs{tl_to_str:n} then
%   grabs the first item, thus skipping spaces.
%   As usual, \cs{str_head:N} expands its argument and
%   hands it to \cs{str_head:n}.  To circumvent the fact that \TeX{}
%   skips spaces when grabbing undelimited macro parameters,
%   \cs{@@_head:w} takes an argument delimited by a space. If |#1|
%   starts with a non-space character, \cs{@@_use_i_delimit_by_s_stop:nw}
%   leaves that in the input stream. On the other hand, if |#1| starts
%   with a space, the \cs{@@_head:w} takes an empty argument, and the
%   single (initially braced) space in the definition of \cs{@@_head:w}
%   makes its way to the output. Finally, for an empty argument, the
%   (braced) empty brace group in the definition of \cs{str_head:n}
%   gives an empty result after passing through
%   \cs{@@_use_i_delimit_by_s_stop:nw}.
%    \begin{macrocode}
\cs_new:Npn \str_head:N { \exp_args:No \str_head:n }
\cs_generate_variant:Nn \str_head:N { c }
\cs_new:Npn \str_head:n #1
  {
    \exp_after:wN \@@_head:w
    \tl_to_str:n {#1}
    { { } } ~ \s_@@_stop
  }
\cs_new:Npn \@@_head:w #1 ~ %
  { \@@_use_i_delimit_by_s_stop:nw #1 { ~ } }
\cs_new:Npn \str_head_ignore_spaces:n #1
  {
    \exp_after:wN \@@_use_i_delimit_by_s_stop:nw
    \tl_to_str:n {#1} { } \s_@@_stop
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \begin{macro}[EXP]
%   {\str_tail:N, \str_tail:c, \str_tail:n, \str_tail_ignore_spaces:n}
% \begin{macro}[EXP]{\@@_tail_auxi:w, \@@_tail_auxii:w}
%   Getting the tail is a little bit more convoluted than the head of a
%   string.  We hit the front of the string with \cs{reverse_if:N}
%   \cs{if_charcode:w} \cs{scan_stop:}.  This removes the first
%   character, and necessarily makes the test true, since the character
%   cannot match \cs{scan_stop:}. The auxiliary function then inserts
%   the required \cs{fi:} to close the conditional, and leaves the tail
%   of the string in the input stream.  The details are such that an
%   empty string has an empty tail (this requires in particular that the
%   end-marker |X| be unexpandable and not a control sequence).  The
%   \texttt{_ignore_spaces} is rather simpler: after converting the
%   input to a string, \cs{@@_tail_auxii:w} removes one undelimited
%   argument and leaves everything else until an end-marker \cs{s_@@_mark}.
%   One can check that an empty (or blank) string yields an empty
%   tail.
%    \begin{macrocode}
\cs_new:Npn \str_tail:N { \exp_args:No \str_tail:n }
\cs_generate_variant:Nn \str_tail:N { c }
\cs_new:Npn \str_tail:n #1
  {
    \exp_after:wN \@@_tail_auxi:w
    \reverse_if:N \if_charcode:w
        \scan_stop: \tl_to_str:n {#1} X X \s_@@_stop
  }
\cs_new:Npn \@@_tail_auxi:w #1 X #2 \s_@@_stop { \fi: #1 }
\cs_new:Npn \str_tail_ignore_spaces:n #1
  {
    \exp_after:wN \@@_tail_auxii:w
    \tl_to_str:n {#1} \s_@@_mark \s_@@_mark \s_@@_stop
  }
\cs_new:Npn \@@_tail_auxii:w #1 #2 \s_@@_mark #3 \s_@@_stop { #2 }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \subsection{String manipulation}
%
% \begin{macro}[EXP]
%   {
%     \str_casefold:n, \str_casefold:V,
%     \str_lowercase:n, \str_lowercase:f,
%     \str_uppercase:n, \str_uppercase:f
%   }
% \begin{macro}[EXP]{\@@_change_case:nn}
% \begin{macro}[EXP]{\@@_change_case_aux:nn}
% \begin{macro}[EXP]{\@@_change_case_result:n}
% \begin{macro}[EXP]{\@@_change_case_output:nw, \@@_change_case_output:fw}
% \begin{macro}[EXP]{\@@_change_case_end:nw}
% \begin{macro}[EXP]{\@@_change_case_loop:nw}
% \begin{macro}[EXP]{\@@_change_case_space:n}
% \begin{macro}[EXP]
%   {\@@_change_case_char:nN, \@@_change_case_char_auxi:nN, \@@_change_case_char_auxii:nN}
% \begin{macro}[EXP]{\@@_change_case_codepoint:nN}
% \begin{macro}[EXP]{\@@_change_case_codepoint:nNN}
% \begin{macro}[EXP]{\@@_change_case_codepoint:nNNN}
% \begin{macro}[EXP]{\@@_change_case_codepoint:nNNNN}
% \begin{macro}[EXP]{\@@_change_case_char:nnn, \@@_change_case_char_aux:nnn}
% \begin{macro}[EXP]{\@@_change_case_char:nnnnn}
%   Case changing for programmatic reasons is done by first detokenizing
%   input then doing a simple loop that only has to worry about spaces
%   and everything else. The output is detokenized to allow data sharing
%   with text-based case changing. Similarly, for $8$-bit engines the
%   multi-byte information is shared.
%    \begin{macrocode}
\cs_new:Npn \str_casefold:n  #1 { \@@_change_case:nn {#1} { casefold } }
\cs_new:Npn \str_lowercase:n #1 { \@@_change_case:nn {#1} { lowercase } }
\cs_new:Npn \str_uppercase:n #1 { \@@_change_case:nn {#1} { uppercase } }
\cs_generate_variant:Nn \str_casefold:n  { V }
\cs_generate_variant:Nn \str_lowercase:n { f }
\cs_generate_variant:Nn \str_uppercase:n { f }
\cs_new:Npn \@@_change_case:nn #1
  {
    \exp_after:wN \@@_change_case_aux:nn \exp_after:wN
      { \tl_to_str:n {#1} }
  }
\cs_new:Npn \@@_change_case_aux:nn #1#2
  {
    \@@_change_case_loop:nw {#2} #1 \q_@@_recursion_tail \q_@@_recursion_stop
      \@@_change_case_result:n { }
  }
\cs_new:Npn \@@_change_case_output:nw #1#2 \@@_change_case_result:n #3
  { #2 \@@_change_case_result:n { #3 #1 } }
\cs_generate_variant:Nn  \@@_change_case_output:nw { f }
\cs_new:Npn \@@_change_case_end:wn #1 \@@_change_case_result:n #2
  { \tl_to_str:n {#2} }
\cs_new:Npn \@@_change_case_loop:nw #1#2 \q_@@_recursion_stop
  {
    \tl_if_head_is_space:nTF {#2}
      { \@@_change_case_space:n }
      { \@@_change_case_char:nN }
    {#1} #2 \q_@@_recursion_stop
  }
\exp_last_unbraced:NNNNo
  \cs_new:Npn \@@_change_case_space:n #1 \c_space_tl
  {
    \@@_change_case_output:nw { ~ }
    \@@_change_case_loop:nw {#1}
  }
\cs_new:Npn \@@_change_case_char:nN #1#2
  {
    \@@_if_recursion_tail_stop_do:Nn #2
      { \@@_change_case_end:wn }
    \@@_change_case_codepoint:nN {#1} #2
  }
\if_int_compare:w 0
  \cs_if_exist:NT \tex_XeTeXversion:D { 1 }
  \cs_if_exist:NT \tex_luatexversion:D { 1 }
  > 0 \exp_stop_f:
  \cs_new:Npn \@@_change_case_codepoint:nN #1#2
    { \@@_change_case_char:fnn { \int_eval:n {`#2} } {#1} {#2} }
\else:
    \cs_new:Npe \@@_change_case_codepoint:nN #1#2
      {
        \exp_not:N \int_compare:nNnTF {`#2} > { "80 }
          {
            \cs_if_exist:NTF \tex_pdftexversion:D
              { \exp_not:N \@@_change_case_char_auxi:nN }
              {
                \exp_not:N \int_compare:nNnTF {`#2} > { "FF }
                  { \exp_not:N \@@_change_case_char_auxii:nN }
                  { \exp_not:N \@@_change_case_char_auxi:nN }
              }
          }
          { \exp_not:N \@@_change_case_char_auxii:nN }
            {#1} #2
      }
    \cs_new:Npn \@@_change_case_char_auxi:nN #1#2
      {
        \int_compare:nNnTF {`#2} < { "E0 }
          { \@@_change_case_codepoint:nNN }
          {
            \int_compare:nNnTF {`#2} < { "F0 }
              { \@@_change_case_codepoint:nNNN }
              { \@@_change_case_codepoint:nNNNNN }
          }
            {#1} #2
      }
    \cs_new:Npn \@@_change_case_char_auxii:nN #1#2
      { \@@_change_case_char:fnn { \int_eval:n {`#2} } {#1} {#2} }
    \cs_new:Npn \@@_change_case_codepoint:nNN #1#2#3
      {
        \@@_change_case_char:fnn
          { \int_eval:n { (`#2 - "C0) * "40 + `#3 - "80 } }
          {#1} {#2#3}
      }
    \cs_new:Npn \@@_change_case_codepoint:nNNN #1#2#3#4
      {
        \@@_change_case_char:fnn
          {
            \int_eval:n
              { (`#2 - "E0) * "1000 + (`#3 - "80) * "40 + `#4 - "80 }
          }
          {#1} {#2#3#4}
      }
    \cs_new:Npn \@@_change_case_codepoint:nNNNN #1#2#3#4#5
      {
        \@@_change_case_char:fnn
          {
            \int_eval:n
              {
                  (`#2 - "F0) * "40000
                + (`#3 - "80) * "1000
                + (`#4 - "80) * "40
                + `#5 - "80
              }
          }
          {#1} {#2#3#4#5}
      }
\fi:
\cs_new:Npn \@@_change_case_char:nnn #1#2#3
  {
    \@@_change_case_output:fw
      {
        \exp_args:Ne \@@_change_case_char_aux:nnn
          { \__kernel_codepoint_case:nn {#2} {#1} } {#1} {#3}
      }
    \@@_change_case_loop:nw {#2}
  }
\cs_generate_variant:Nn \@@_change_case_char:nnn { f }
\cs_new:Npn \@@_change_case_char_aux:nnn #1#2#3
  {
    \use:e { \@@_change_case_char:nnnnn #1 {#2} {#3} }
  }
\cs_new:Npn \@@_change_case_char:nnnnn #1#2#3#4#5
  {
    \int_compare:nNnTF {#1} = {#4}
      { \tl_to_str:n {#5} }
      {
        \codepoint_str_generate:n {#1}
        \tl_if_blank:nF {#2}
          {
            \codepoint_str_generate:n {#2}
            \tl_if_blank:nF {#3}
              { \codepoint_str_generate:n {#3} }
          }
      }
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
%
% \begin{macro}[EXP]{\str_mdfive_hash:n, \str_mdfive_hash:e}
%    \begin{macrocode}
\cs_new:Npn \str_mdfive_hash:n #1 { \tex_mdfivesum:D { \tl_to_str:n {#1} } }
\cs_new:Npn \str_mdfive_hash:e #1 { \tex_mdfivesum:D {#1} }
%    \end{macrocode}
% \end{macro}
%
% \begin{variable}
%   {
%     \c_ampersand_str,
%     \c_atsign_str,
%     \c_backslash_str,
%     \c_left_brace_str,
%     \c_right_brace_str,
%     \c_circumflex_str,
%     \c_colon_str,
%     \c_dollar_str,
%     \c_hash_str,
%     \c_percent_str,
%     \c_tilde_str,
%     \c_underscore_str,
%     \c_zero_str
%   }
%   For all of those strings, use \cs{cs_to_str:N} to get characters with
%   the correct category code without worries
%    \begin{macrocode}
\str_const:Ne \c_ampersand_str   { \cs_to_str:N \& }
\str_const:Ne \c_atsign_str      { \cs_to_str:N \@ }
\str_const:Ne \c_backslash_str   { \cs_to_str:N \\ }
\str_const:Ne \c_left_brace_str  { \cs_to_str:N \{ }
\str_const:Ne \c_right_brace_str { \cs_to_str:N \} }
\str_const:Ne \c_circumflex_str  { \cs_to_str:N \^ }
\str_const:Ne \c_colon_str       { \cs_to_str:N \: }
\str_const:Ne \c_dollar_str      { \cs_to_str:N \$ }
\str_const:Ne \c_hash_str        { \cs_to_str:N \# }
\str_const:Ne \c_percent_str     { \cs_to_str:N \% }
\str_const:Ne \c_tilde_str       { \cs_to_str:N \~ }
\str_const:Ne \c_underscore_str  { \cs_to_str:N \_ }
\str_const:Ne \c_zero_str        { 0 }
%    \end{macrocode}
% \end{variable}
%
% \begin{variable}{\c_empty_str}
%   An empty string is simply an empty token list.
%    \begin{macrocode}
\cs_new_eq:NN \c_empty_str \c_empty_tl
%    \end{macrocode}
% \end{variable}
%
% \begin{variable}{\l_tmpa_str, \l_tmpb_str, \g_tmpa_str, \g_tmpb_str}
%   Scratch strings.
%    \begin{macrocode}
\str_new:N \l_tmpa_str
\str_new:N \l_tmpb_str
\str_new:N \g_tmpa_str
\str_new:N \g_tmpb_str
%    \end{macrocode}
% \end{variable}
%
% \subsection{Viewing strings}
%
% \begin{macro}{\str_show:n, \str_show:N, \str_show:c}
% \begin{macro}{\str_log:n, \str_log:N, \str_log:c}
%   Displays a string on the terminal.
%    \begin{macrocode}
\cs_new_eq:NN \str_show:n \tl_show:n
\cs_new_protected:Npn \str_show:N #1
  {
    \__kernel_chk_tl_type:NnnT #1 { str } { \tl_to_str:N #1 }
      { \tl_show:N #1 }
  }
\cs_generate_variant:Nn \str_show:N { c }
\cs_new_eq:NN \str_log:n \tl_log:n
\cs_new_protected:Npn \str_log:N #1
  {
    \__kernel_chk_tl_type:NnnT #1 { str } { \tl_to_str:N #1 }
      { \tl_log:N #1 }
  }
\cs_generate_variant:Nn \str_log:N { c }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
%    \begin{macrocode}
%
%    \end{macrocode}
%
% \end{implementation}
%
% \PrintIndex