f5f67e2e80
These calls are now replaced with explicit UTF conversion routines in the common/string_calls.[hc] and common/parse.[hc] modules. Also removed:- - The support code in common/os_calls.c to set the locale to use these routines. - The twchar type in arch.h
407 lines
13 KiB
C
407 lines
13 KiB
C
/**
|
|
* xrdp: A Remote Desktop Protocol server.
|
|
*
|
|
* Copyright (C) Jay Sorg 2004-2020
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*
|
|
* generic string handling calls
|
|
*/
|
|
|
|
#if !defined(STRING_CALLS_H)
|
|
#define STRING_CALLS_H
|
|
|
|
#include "arch.h"
|
|
|
|
/**
|
|
* Map a character to a string value
|
|
*
|
|
* This structure is used by g_format_info_string() to specify the
|
|
* string which could be output for %'ch', where ch is a character
|
|
*/
|
|
struct info_string_tag
|
|
{
|
|
char ch;
|
|
const char *val;
|
|
};
|
|
|
|
#define INFO_STRING_END_OF_LIST { '\0', NULL }
|
|
|
|
/**
|
|
* Map a bitmask to a string value
|
|
*
|
|
*
|
|
* This structure is used by g_bitmask_to_str() to specify the
|
|
* string for each bit in the bitmask
|
|
*/
|
|
struct bitmask_string
|
|
{
|
|
int mask;
|
|
const char *str;
|
|
};
|
|
|
|
#define BITMASK_STRING_END_OF_LIST { 0, NULL }
|
|
|
|
/**
|
|
* Map a bitmask to a char value
|
|
*
|
|
*
|
|
* This structure is used by g_bitmask_to_charstr() to specify the
|
|
* char for each bit in the bitmask
|
|
*/
|
|
struct bitmask_char
|
|
{
|
|
int mask;
|
|
char c;
|
|
};
|
|
|
|
#define BITMASK_CHAR_END_OF_LIST { 0, '\0' }
|
|
|
|
enum
|
|
{
|
|
// See g_sig2text()
|
|
// Must be able to hold "SIG#%d" for INT_MIN
|
|
//
|
|
// ((sizeof(int) * 5 + 1) / 2) provides a very slight overestimate of
|
|
// the bytes requires to store a decimal expansion of 'int':-
|
|
// sizeof INT_MAX display bytes ((sizeof(int) * 5 + 1)
|
|
// (int) needed / 2)
|
|
// ------ ------- ------------- ---------------------------
|
|
// 1 127 3 3
|
|
// 2 32767 5 5
|
|
// 3 8388607 7 8
|
|
// 4 2147483637 10 10
|
|
// 8 9*(10**18) 19 20
|
|
// 16 2*(10**38) 39 40
|
|
// 32 6*(10**76) 77 80
|
|
MAXSTRSIGLEN = (3 + 1 + 1 + ((sizeof(int) * 5 + 1) / 2) + 1)
|
|
};
|
|
|
|
/*
|
|
* Significant Universal Character Set (Unicode) characters
|
|
*/
|
|
enum
|
|
{
|
|
UCS_WHITE_SQUARE = 0x25a1,
|
|
UCS_REPLACEMENT_CHARACTER = 0xfffd
|
|
};
|
|
|
|
/**
|
|
* Processes a format string for general info
|
|
*
|
|
* @param[out] dest Destination buffer
|
|
* @param[in] len Length of buffer, including space for a terminator
|
|
* @param[in] format Format string to process
|
|
* @param[in] map Array of struct info_string_tag.
|
|
*
|
|
* Where a '%<ch>' is encountered in the format string, the map is scanned
|
|
* and the corresponding string is copied instead of '%<ch>'.
|
|
*
|
|
* '%%' is always replaced with a single '%' in the output. %<ch> strings
|
|
* not present in the map are ignored.
|
|
*
|
|
* The map is terminated with INFO_STRING_END_OF_LIST
|
|
*
|
|
* Caller can check for buffer truncation by comparing the result with
|
|
* the buffer length (as in snprintf())
|
|
*/
|
|
unsigned int
|
|
g_format_info_string(char *dest, unsigned int len,
|
|
const char *format,
|
|
const struct info_string_tag map[]);
|
|
|
|
|
|
/**
|
|
* Converts a boolean to a string for output
|
|
*
|
|
* @param[in] value Value to convert
|
|
* @return String representation
|
|
*/
|
|
const char *
|
|
g_bool2text(int value);
|
|
|
|
/**
|
|
* Converts a string to a boolean value
|
|
*
|
|
* @param[in] s String to convert
|
|
* @return machine representation
|
|
*/
|
|
int
|
|
g_text2bool(const char *s);
|
|
|
|
/**
|
|
* Joins an array of strings into a single string.
|
|
*
|
|
* Note: The joiner is placed between each source string. The joiner is not
|
|
* placed after the last source string. If there is only one source string,
|
|
* then the result string will be equal to the source string.
|
|
*
|
|
* Note: any content that is present in dest will be overwritten with the new
|
|
* joined string.
|
|
*
|
|
* Note: If the destination array is not large enough to hold the entire
|
|
* contents of the joined string, then the joined string will be truncated
|
|
* to fit in the destination array.
|
|
*
|
|
* @param[out] dest The destination array to write the joined string into.
|
|
* @param[in] dest_len The max number of characters to write to the destination
|
|
* array including the terminating null. Must be > 0
|
|
* @param[in] joiner The string to concatenate between each source string.
|
|
* The joiner string may be NULL which is processed as a zero length string.
|
|
* @param[in] src An array of strings to join. The array must be non-null.
|
|
* Array items may be NULL and are processed as zero length strings.
|
|
* @param[in] src_len The number of strings to join in the src array. Must be > 0
|
|
* @return A pointer to the beginning of the joined string (ie. returns dest).
|
|
*/
|
|
char *
|
|
g_strnjoin(char *dest, int dest_len, const char *joiner, const char *src[], int src_len);
|
|
|
|
/**
|
|
* Converts a binary array into a hux dump suitable for displaying to a user.
|
|
*
|
|
* The format of the hex dump is:
|
|
* 0000 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 ................
|
|
* /\ /\ /\
|
|
* | | |
|
|
* | | ascii representation of bytes
|
|
* | hex representation of bytes
|
|
* offset from beginning of the byte array in hex
|
|
*
|
|
* Note: the ascii representation uses '.' for all non-printable
|
|
* characters (eg. below 32 or above 127).
|
|
*
|
|
* Note: the string contains embedded new lines, but is not new line terminated.
|
|
*
|
|
* @param[in] src Value to convert
|
|
* @param[in] len The number of bytes in src to convert
|
|
* @return string containing the hex dump that must be free'd by the caller
|
|
*/
|
|
char *
|
|
g_bytes_to_hexdump(const char *src, int len);
|
|
|
|
/**
|
|
* Extracts the display number from an X11 display string
|
|
*
|
|
* @param Display string (i.e. g_getenv("DISPLAY"))
|
|
*
|
|
* @result <0 if the string could not be parsed, or >=0 for a display number
|
|
*/
|
|
int
|
|
g_get_display_num_from_display(const char *display_text);
|
|
|
|
/**
|
|
* Converts a bitmask into a string for output purposes
|
|
*
|
|
* Similar to g_bitmask_to_charstr(), but tokens are strings, separated
|
|
* by delimiters.
|
|
*
|
|
* @param bitmask Bitmask to convert
|
|
* @param bitdefs Definitions for strings for bits
|
|
* @param delim Delimiter to use between strings
|
|
* @param buff Output buff
|
|
* @param bufflen Length of buff, including terminator '`\0'
|
|
*
|
|
* @return Total length excluding terminator which would be written, as
|
|
* in snprintf(). Can be used to check for overflow
|
|
*
|
|
* @note Any undefined bits in the bitmask are appended to the output as
|
|
* a hexadecimal constant.
|
|
*/
|
|
int
|
|
g_bitmask_to_str(int bitmask, const struct bitmask_string bitdefs[],
|
|
char delim, char *buff, int bufflen);
|
|
|
|
/***
|
|
* Converts a string containing a series of tokens to a bitmask.
|
|
*
|
|
* Similar to g_charstr_to_bitmask(), but tokens are strings, separated
|
|
* by delimiters.
|
|
*
|
|
* @param str Input string
|
|
* @param bitdefs Array mapping tokens to bitmask values
|
|
* @param delim Delimiter for tokens in str
|
|
* @param[out] unrecognised Buffer for any unrecognised tokens
|
|
* @param unrecognised_len Length of unrecognised including '\0';
|
|
* @return bitmask value for recognised tokens
|
|
*/
|
|
int
|
|
g_str_to_bitmask(const char *str, const struct bitmask_string bitdefs[],
|
|
const char *delim, char *unrecognised,
|
|
int unrecognised_len);
|
|
|
|
/**
|
|
* Converts a bitmask into a string for output purposes
|
|
*
|
|
* Similar to g_bitmask_to_str(), but tokens are individual characters, and
|
|
* there are no delimiters.
|
|
*
|
|
* @param bitmask Bitmask to convert
|
|
* @param bitdefs Definitions for strings for bits
|
|
* @param buff Output buff
|
|
* @param bufflen Length of buff, including terminator '`\0'
|
|
* @param[out] rest Any unused bits which weren't covered by bitdefs.
|
|
* May be NULL.
|
|
*
|
|
* @return Total length excluding terminator which would be written, as
|
|
* in snprintf(). Can be used to check for overflow
|
|
*
|
|
* @note Any undefined bits in the bitmask are appended to the output as
|
|
* a hexadecimal constant.
|
|
*/
|
|
int
|
|
g_bitmask_to_charstr(int bitmask, const struct bitmask_char bitdefs[],
|
|
char *buff, int bufflen, int *rest);
|
|
|
|
/***
|
|
* Converts a string containing a series of characters to a bitmask.
|
|
*
|
|
* Similar to g_str_to_bitmask(), but tokens are individual characters, and
|
|
* there are no delimiters.
|
|
*
|
|
* @param str Input string
|
|
* @param bitdefs Array mapping tokens to bitmask values
|
|
* @param delim Delimiter for tokens in str
|
|
* @param[out] unrecognised Buffer for any unrecognised tokens
|
|
* @param unrecognised_len Length of unrecognised including '\0';
|
|
* @return bitmask value for recognised tokens
|
|
*/
|
|
int
|
|
g_charstr_to_bitmask(const char *str, const struct bitmask_char bitdefs[],
|
|
char *unrecognised, int unrecognised_len);
|
|
|
|
int g_strlen(const char *text);
|
|
char *g_strchr(const char *text, int c);
|
|
char *g_strrchr(const char *text, int c);
|
|
char *g_strnchr(const char *text, int c, int len);
|
|
char *g_strcpy(char *dest, const char *src);
|
|
char *g_strncpy(char *dest, const char *src, int len);
|
|
char *g_strcat(char *dest, const char *src);
|
|
char *g_strncat(char *dest, const char *src, int len);
|
|
char *g_strdup(const char *in);
|
|
char *g_strndup(const char *in, const unsigned int maxlen);
|
|
int g_strcmp(const char *c1, const char *c2);
|
|
int g_strncmp(const char *c1, const char *c2, int len);
|
|
int g_strncmp_d(const char *c1, const char *c2, const char delim, int len);
|
|
int g_strcasecmp(const char *c1, const char *c2);
|
|
int g_strncasecmp(const char *c1, const char *c2, int len);
|
|
int g_atoi(const char *str);
|
|
|
|
/**
|
|
* Extends g_atoi(), Converts decimal and hexadecimal number String to integer
|
|
*
|
|
* Prefix hexadecimal numbers with '0x'
|
|
*
|
|
* @param str String to convert to an integer
|
|
* @return int Integer expression of a string
|
|
*/
|
|
int g_atoix(const char *str);
|
|
int g_htoi(char *str);
|
|
int g_bytes_to_hexstr(const void *bytes, int num_bytes, char *out_str,
|
|
int bytes_out_str);
|
|
int g_pos(const char *str, const char *to_find);
|
|
char *g_strstr(const char *haystack, const char *needle);
|
|
|
|
/** trim spaces and tabs, anything <= space
|
|
*
|
|
* @param str (assumed to be UTF-8)
|
|
* @param trim_flags 1 trim left, 2 trim right, 3 trim both, 4 trim through
|
|
* @return != 0 - trim_flags not recognised
|
|
* this will always shorten the string or not change it */
|
|
int g_strtrim(char *str, int trim_flags);
|
|
|
|
/**
|
|
* Maps a signal number to a string, i.e. SIGHUP -> "SIGHUP"
|
|
*
|
|
* @param signum Signal number
|
|
* @param sigstr buffer for result
|
|
* @return sigstr, for convenience
|
|
*
|
|
* Buffer is assumed to be at least MAXSTRSIGLEN
|
|
*
|
|
* The string "SIG#<num>" is returned for unrecognised signums
|
|
*/
|
|
char *g_sig2text(int signum, char sigstr[]);
|
|
|
|
/**
|
|
* Get the next Unicode character from a UTF-8 string
|
|
*
|
|
* @param utf8str_ref UTF 8 string [by reference]
|
|
* @param len_ref Length of string [by reference] or NULL
|
|
* @return Unicode character
|
|
*
|
|
* On return, utf8str and len are updated to point past the decoded character.
|
|
* Unrecognised characters are mapped to UCS_REPLACEMENT_CHARACTER
|
|
*
|
|
* len is not needed if your utf8str has a terminator, or is known to
|
|
* be well-formed.
|
|
*/
|
|
char32_t
|
|
utf8_get_next_char(const char **utf8str_ref, unsigned int *len_ref);
|
|
|
|
/**
|
|
* Convert a Unicode character to UTF-8
|
|
* @param c32 Unicode character
|
|
* @param u8str buffer containing at least MAXLEN_UTF8_CHAR bytes for result
|
|
* @return Number of bytes written to u8str. Can be NULL if only the
|
|
* length is needed.
|
|
*
|
|
* The bytes written to u8str are unterminated
|
|
*/
|
|
#define MAXLEN_UTF8_CHAR 4
|
|
unsigned int
|
|
utf_char32_to_utf8(char32_t c32, char *u8str);
|
|
|
|
/**
|
|
* Returns the number of Unicode characters in a UTF-8 string
|
|
* @param utf8str UTF-8 string
|
|
* @result Number of Unicode characters in the string (terminator not included)
|
|
*/
|
|
unsigned int
|
|
utf8_char_count(const char *utf8str);
|
|
|
|
/**
|
|
* Returns the number of UTF-16 words required to store a UTF-8 string
|
|
* @param utf8str UTF-8 string
|
|
* @param len Length of UTF-8 string
|
|
* @result number of words to store UTF-8 string as UTF-16.
|
|
*/
|
|
unsigned int
|
|
utf8_as_utf16_word_count(const char *utf8str, unsigned int len);
|
|
|
|
/**
|
|
* Add a Unicode character into a UTF-8 string
|
|
* @param utf8str Pointer to UTF-8 string
|
|
* @param len Length of buffer for UTF-8 string (includes NULL)
|
|
* @param c32 character to add
|
|
* @param index Where to add the codepoint
|
|
* @return 1 for success, 0 if no character was inserted
|
|
*
|
|
* This routine has to parse the string as it goes, so can be slow.
|
|
*/
|
|
int
|
|
utf8_add_char_at(char *utf8str, unsigned int len, char32_t c32,
|
|
unsigned int index);
|
|
|
|
/**
|
|
* Remove a Unicode character from a UTF-8 string
|
|
* @param utf8str Pointer to UTF-8 string
|
|
* @param index Where to remove the codepoint from (0-based)
|
|
* @return Character removed, or 0 if no character was removed
|
|
*
|
|
* This routine has to parse the string as it goes, so can be slow.
|
|
*/
|
|
char32_t
|
|
utf8_remove_char_at(char *utf8str, unsigned int index);
|
|
|
|
#endif
|