Modest/source/myencoding/encoding.c

/*
 Copyright (C) 2015-2017 Alexander Borisov

 This library is free software; you can redistribute it and/or
 modify it under the terms of the GNU Lesser General Public
 License as published by the Free Software Foundation; either
 version 2.1 of the License, or (at your option) any later version.

 This library is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 Lesser General Public License for more details.

 You should have received a copy of the GNU Lesser General Public
 License along with this library; if not, write to the Free Software
 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

 Author: lex.borisov@gmail.com (Alexander Borisov)
*/

#include "myencoding/encoding.h"
#include "myencoding/resource.h"
#include "mycore/utils/resources.h"

myencoding_custom_f myencoding_get_function_by_id(myencoding_t idx)
{
    return myencoding_function_index[idx];
}

myencoding_status_t myencoding_decode_utf_8(unsigned const char data, myencoding_result_t *res)
{
    // res->first  -- lower boundary
    // res->second -- upper boundary
    // res->result -- code point
    // res->third  -- bytes seen
    // res->flag   -- bytes needed

    if(res->flag == 0)
    {
        if(data <= 0x7F) {
            res->result = data;
            return MyENCODING_STATUS_OK;
        }
        else if(data >= 0xC2 && data <= 0xDF) {
            res->flag = 1;
            res->result = data - 0xC0;
        }
        else if(data >= 0xE0 && data <= 0xEF) {
            if(data == 0xE0) {
                res->first = 0xA0;
            }
            else if(data == 0xED){
                res->second = 0x9F;
            }

            res->flag = 2;
            res->result = data - 0xE0;
        }
        else if(data >= 0xF0 && data <= 0xF4) {
            if(data == 0xF0) {
                res->first = 0x90;
            }
            else if(data == 0xF4){
                res->second = 0x8F;
            }

            res->flag = 2;
            res->result = data - 0xF0;
        }
        else
            return MyENCODING_STATUS_ERROR;

        res->result = res->result << (6 * res->flag);
        return MyENCODING_STATUS_CONTINUE;
    }

    if(data < res->first && data > res->second)
    {
        res->result = 0x00;
        res->flag   = 0x00;
        res->third  = 0x00;
        res->first  = 0x80;
        res->second = 0xBF;

        return MyENCODING_STATUS_DONE|MyENCODING_STATUS_ERROR;
    }

    res->first  = 0x80;
    res->second = 0xBF;

    res->third++;
    res->result += (unsigned long)(data - 0x80) << (6 * (res->flag - res->third));

    if(res->third != res->flag)
        return MyENCODING_STATUS_CONTINUE;

    res->flag  = 0x00;
    res->third = 0x00;

    return MyENCODING_STATUS_OK;
}

myencoding_status_t myencoding_decode_ibm866(unsigned const char data, myencoding_result_t *res)
{
    if(data <= 0x7F)
        (res->result = data);
    else
        (res->result = myencoding_map_ibm866[(data - 0x80)]);

    return MyENCODING_STATUS_OK;
}

myencoding_status_t myencoding_decode_iso_8859_2(unsigned const char data, myencoding_result_t *res)
{
    if(data <= 0x7F)
        (res->result = data);
    else
        (res->result = myencoding_map_iso_8859_2[(data - 0x80)]);

    return MyENCODING_STATUS_OK;
}

myencoding_status_t myencoding_decode_iso_8859_3(unsigned const char data, myencoding_result_t *res)
{
    if(data <= 0x7F)
        (res->result = data);
    else
        (res->result = myencoding_map_iso_8859_3[(data - 0x80)]);

    return MyENCODING_STATUS_OK;
}

myencoding_status_t myencoding_decode_iso_8859_4(unsigned const char data, myencoding_result_t *res)
{
    if(data <= 0x7F)
        (res->result = data);
    else
        (res->result = myencoding_map_iso_8859_4[(data - 0x80)]);

    return MyENCODING_STATUS_OK;
}

myencoding_status_t myencoding_decode_iso_8859_5(unsigned const char data, myencoding_result_t *res)
{
    if(data <= 0x7F)
        (res->result = data);
    else
        (res->result = myencoding_map_iso_8859_5[(data - 0x80)]);

    return MyENCODING_STATUS_OK;
}

myencoding_status_t myencoding_decode_iso_8859_6(unsigned const char data, myencoding_result_t *res)
{
    if(data <= 0x7F)
        (res->result = data);
    else
        (res->result = myencoding_map_iso_8859_6[(data - 0x80)]);

    return MyENCODING_STATUS_OK;
}

myencoding_status_t myencoding_decode_iso_8859_7(unsigned const char data, myencoding_result_t *res)
{
    if(data <= 0x7F)
        (res->result = data);
    else
        (res->result = myencoding_map_iso_8859_7[(data - 0x80)]);

    return MyENCODING_STATUS_OK;
}

myencoding_status_t myencoding_decode_iso_8859_8(unsigned const char data, myencoding_result_t *res)
{
    if(data <= 0x7F)
        (res->result = data);
    else
        (res->result = myencoding_map_iso_8859_8[(data - 0x80)]);

    return MyENCODING_STATUS_OK;
}

myencoding_status_t myencoding_decode_iso_8859_8_i(unsigned const char data, myencoding_result_t *res)
{
    if(data <= 0x7F)
        (res->result = data);
    else
        (res->result = myencoding_map_iso_8859_8[(data - 0x80)]);

    return MyENCODING_STATUS_OK;
}

myencoding_status_t myencoding_decode_iso_8859_10(unsigned const char data, myencoding_result_t *res)
{
    if(data <= 0x7F)
        (res->result = data);
    else
        (res->result = myencoding_map_iso_8859_10[(data - 0x80)]);

    return MyENCODING_STATUS_OK;
}

myencoding_status_t myencoding_decode_iso_8859_13(unsigned const char data, myencoding_result_t *res)
{
    if(data <= 0x7F)
        (res->result = data);
    else
        (res->result = myencoding_map_iso_8859_13[(data - 0x80)]);

    return MyENCODING_STATUS_OK;
}

myencoding_status_t myencoding_decode_iso_8859_14(unsigned const char data, myencoding_result_t *res)
{
    if(data <= 0x7F)
        (res->result = data);
    else
        (res->result = myencoding_map_iso_8859_14[(data - 0x80)]);

    return MyENCODING_STATUS_OK;
}

myencoding_status_t myencoding_decode_iso_8859_15(unsigned const char data, myencoding_result_t *res)
{
    if(data <= 0x7F)
        (res->result = data);
    else
        (res->result = myencoding_map_iso_8859_15[(data - 0x80)]);

    return MyENCODING_STATUS_OK;
}

myencoding_status_t myencoding_decode_iso_8859_16(unsigned const char data, myencoding_result_t *res)
{
    if(data <= 0x7F)
        (res->result = data);
    else
        (res->result = myencoding_map_iso_8859_16[(data - 0x80)]);

    return MyENCODING_STATUS_OK;
}

myencoding_status_t myencoding_decode_koi8_r(unsigned const char data, myencoding_result_t *res)
{
    if(data <= 0x7F)
        (res->result = data);
    else
        (res->result = myencoding_map_koi8_r[(data - 0x80)]);

    return MyENCODING_STATUS_OK;
}

myencoding_status_t myencoding_decode_koi8_u(unsigned const char data, myencoding_result_t *res)
{
    if(data <= 0x7F)
        (res->result = data);
    else
        (res->result = myencoding_map_koi8_u[(data - 0x80)]);

    return MyENCODING_STATUS_OK;
}

myencoding_status_t myencoding_decode_macintosh(unsigned const char data, myencoding_result_t *res)
{
    if(data <= 0x7F)
        (res->result = data);
    else
        (res->result = myencoding_map_macintosh[(data - 0x80)]);

    return MyENCODING_STATUS_OK;
}

myencoding_status_t myencoding_decode_windows_874(unsigned const char data, myencoding_result_t *res)
{
    if(data <= 0x7F)
        (res->result = data);
    else
        (res->result = myencoding_map_windows_874[(data - 0x80)]);

    return MyENCODING_STATUS_OK;
}

myencoding_status_t myencoding_decode_windows_1250(unsigned const char data, myencoding_result_t *res)
{
    if(data <= 0x7F)
        (res->result = data);
    else
        (res->result = myencoding_map_windows_1250[(data - 0x80)]);

    return MyENCODING_STATUS_OK;
}

myencoding_status_t myencoding_decode_windows_1251(unsigned const char data, myencoding_result_t *res)
{
    if(data <= 0x7F)
        (res->result = data);
    else
        (res->result = myencoding_map_windows_1251[(data - 0x80)]);

    return MyENCODING_STATUS_OK;
}

myencoding_status_t myencoding_decode_windows_1252(unsigned const char data, myencoding_result_t *res)
{
    if(data <= 0x7F)
        (res->result = data);
    else
        (res->result = myencoding_map_windows_1252[(data - 0x80)]);

    return MyENCODING_STATUS_OK;
}

myencoding_status_t myencoding_decode_windows_1253(unsigned const char data, myencoding_result_t *res)
{
    if(data <= 0x7F)
        (res->result = data);
    else
        (res->result = myencoding_map_windows_1253[(data - 0x80)]);

    return MyENCODING_STATUS_OK;
}

myencoding_status_t myencoding_decode_windows_1254(unsigned const char data, myencoding_result_t *res)
{
    if(data <= 0x7F)
        (res->result = data);
    else
        (res->result = myencoding_map_windows_1254[(data - 0x80)]);

    return MyENCODING_STATUS_OK;
}

myencoding_status_t myencoding_decode_windows_1255(unsigned const char data, myencoding_result_t *res)
{
    if(data <= 0x7F)
        (res->result = data);
    else
        (res->result = myencoding_map_windows_1255[(data - 0x80)]);

    return MyENCODING_STATUS_OK;
}

myencoding_status_t myencoding_decode_windows_1256(unsigned const char data, myencoding_result_t *res)
{
    if(data <= 0x7F)
        (res->result = data);
    else
        (res->result = myencoding_map_windows_1256[(data - 0x80)]);

    return MyENCODING_STATUS_OK;
}

myencoding_status_t myencoding_decode_windows_1257(unsigned const char data, myencoding_result_t *res)
{
    if(data <= 0x7F)
        (res->result = data);
    else
        (res->result = myencoding_map_windows_1257[(data - 0x80)]);

    return MyENCODING_STATUS_OK;
}

myencoding_status_t myencoding_decode_windows_1258(unsigned const char data, myencoding_result_t *res)
{
    if(data <= 0x7F)
        (res->result = data);
    else
        (res->result = myencoding_map_windows_1258[(data - 0x80)]);

    return MyENCODING_STATUS_OK;
}

myencoding_status_t myencoding_decode_x_mac_cyrillic(unsigned const char data, myencoding_result_t *res)
{
    if(data <= 0x7F)
        (res->result = data);
    else
        (res->result = myencoding_map_x_mac_cyrillic[(data - 0x80)]);

    return MyENCODING_STATUS_OK;
}

myencoding_status_t myencoding_decode_gbk(unsigned const char data, myencoding_result_t *res)
{
    return myencoding_decode_gb18030(data, res);
}

unsigned long myencoding_index_gb18030_ranges_code_point(unsigned long pointer)
{
    // step 1
    if((pointer > 39419 && pointer < 189000) || pointer > 1237575)
        return 0;

    // step 2
    if(pointer == 7457)
        return 0xe7c7;

    // step 3
    unsigned long offset = sizeof(myencoding_map_gb18030_ranges) / (sizeof(unsigned long) * 2);

    unsigned long code_point_offset = 0;
    while (offset) {
        offset--;

        if(myencoding_map_gb18030_ranges[offset][0] == pointer ||
           myencoding_map_gb18030_ranges[offset][0] < pointer)
        {
            code_point_offset = myencoding_map_gb18030_ranges[offset][1];
            break;
        }
    }

    // step 4
    return (code_point_offset + pointer - offset);
}

myencoding_status_t myencoding_decode_gb18030(unsigned const char data, myencoding_result_t *res)
{
    if(res->third) {
        if(data >= 0x30 && data <= 0x39) {
            res->result = myencoding_index_gb18030_ranges_code_point((((res->first - 0x81) * 10 + res->second - 0x30) *
                                                                          126 + res->third - 0x81) * 10 + data - 0x30);
        }

        res->first  = 0;
        res->second = 0;
        res->third  = 0;

        if(res->result)
            return MyENCODING_STATUS_OK;

        return MyENCODING_STATUS_ERROR;
    }
    else if(res->second) {
        if(data >= 0x81 && data <= 0xFE) {
            res->third = data;
            return MyENCODING_STATUS_CONTINUE;
        }

        res->first = 0;
        res->second = 0;

        return MyENCODING_STATUS_ERROR;
    }
    else if(res->first) {
        if(data >= 0x30 && data <= 0x39) {
            res->second = data;
            return MyENCODING_STATUS_CONTINUE;
        }

        unsigned long lead = res->first, pointer = 0x00;
        res->first = 0x00;

        unsigned char offset;

        if(data < 0x7F)
            offset = 0x40;
        else
            offset = 0x41;

        if((data >= 0x40 && data <= 0x7E) ||
           (data >= 0x80 && data <= 0xFE))
        {
            pointer = (lead - 0x81) * 190 + (data - offset);
        }
        else {
            return MyENCODING_STATUS_ERROR;
        }

        res->result = myencoding_map_gb18030[pointer];
        return MyENCODING_STATUS_OK;
    }

    if(data <= 0x7F) {
        res->result = data;
        return MyENCODING_STATUS_OK;
    }

    if(data == 0x80) {
        res->result = 0x20ac;
        return MyENCODING_STATUS_OK;
    }

    if(data >= 0x81 && data <= 0xFE) {
        res->first = data;
        return MyENCODING_STATUS_CONTINUE;
    }

    return MyENCODING_STATUS_ERROR;
}

myencoding_status_t myencoding_decode_big5(unsigned const char data, myencoding_result_t *res)
{
    if(res->first)
    {
        unsigned long lead = res->first;
        unsigned long pointer = 0x00;
        unsigned long offset;

        res->first = 0x00;

        if(data < 0x7F)
            offset = 0x40;
        else
            offset = 0x62;

        if((data >= 0x40 && data <= 0x7E) ||
           (data >= 0xA1 && data <= 0xFE))
        {
            pointer = (lead - 0x81) * 157 + (data - offset);
        }

        switch (pointer) {
            case 1133:
                // U+00CA U+0304
                res->result     = 0x00ca;
                res->result_aux = 0x0304;
                return MyENCODING_STATUS_OK;
            case 1135:
                // U+00CA U+030C
                res->result     = 0x00ca;
                res->result_aux = 0x030c;
                return MyENCODING_STATUS_OK;
            case 1164:
                // U+00EA U+0304
                res->result     = 0x00ea;
                res->result_aux = 0x0304;
                return MyENCODING_STATUS_OK;
            case 1166:
                // U+00EA U+030C
                res->result     = 0x00ea;
                res->result_aux = 0x030c;
                return MyENCODING_STATUS_OK;
            default:
                break;
        }

        if(pointer == 0)
            return MyENCODING_STATUS_ERROR;

        res->result = myencoding_map_big5[pointer];
        res->result_aux = 0;

        return MyENCODING_STATUS_OK;
    }

    if(data <= 0x7F) {
        res->result = data;
        return MyENCODING_STATUS_OK;
    }

    if(data >= 0x81 && data <= 0xFE) {
        res->first = data;
        return MyENCODING_STATUS_CONTINUE;
    }

    return MyENCODING_STATUS_ERROR;
}

myencoding_status_t myencoding_decode_euc_jp(unsigned const char data, myencoding_result_t *res)
{
    if(res->first == 0x8E && (data >= 0xA1 && data <= 0xDF)) {
        res->first = 0x00;
        res->result = 0xFF61 + data - 0xA1;

        return MyENCODING_STATUS_OK;
    }
    else if(res->first == 0x8F && (data >= 0xA1 && data <= 0xFE)) {
        res->flag = 1;
        res->first = data;

        return MyENCODING_STATUS_CONTINUE;
    }
    else if(res->first)
    {
        unsigned long lead = res->first;
        res->first = 0x00;

        if((lead >= 0xA1 && lead <= 0xFE) &&
           (data >= 0xA1 && data <= 0xFE))
        {
            unsigned long idx = (lead - 0xA1) * 94 + data - 0xA1;
            if(res->flag) {
                res->result = myencoding_map_jis0212[idx];
            }
            else {
                res->result = myencoding_map_jis0208[idx];
            }

            res->flag = 0;

            if(res->result == 0)
                return MyENCODING_STATUS_ERROR;

            return MyENCODING_STATUS_OK;
        }
    }

    if(data <= 0x7F) {
        res->result = data;
        return MyENCODING_STATUS_OK;
    }

    if((data >= 0x8E && data <= 0x8F) ||
       (data >= 0xA1 && data <= 0xFE))
    {
        res->first = data;
        return MyENCODING_STATUS_CONTINUE;
    }

    return MyENCODING_STATUS_ERROR;
}

myencoding_status_t myencoding_decode_iso_2022_jp(unsigned const char data, myencoding_result_t *res)
{
    // res->first  -- lead
    // res->second -- state
    // res->third  -- output state
    // res->flag   -- output flag

    switch (res->second) {
        case 0: // ASCII
        {
            if(data == 0x1B) {
                res->second = 6;
                return MyENCODING_STATUS_CONTINUE;
            }
            else if((data <= 0x7F) &&
                    data != 0x0E && data != 0x0F && data != 0x1B)
            {
                res->flag   = 0;
                res->result = data;
                return MyENCODING_STATUS_OK;
            }

            res->flag = 0;
            return MyENCODING_STATUS_ERROR;
        }

        case 1: // Roman
        {
            if(data == 0x1B) {
                res->second = 6;
                return MyENCODING_STATUS_CONTINUE;
            }
            else if(data == 0x5C) {
                res->flag   = 0;
                res->result = 0x00A5;
                return MyENCODING_STATUS_OK;
            }
            else if(data == 0x7E) {
                res->flag   = 0;
                res->result = 0x203E;
                return MyENCODING_STATUS_OK;
            }
            else if((data <= 0x7F) &&
                    data != 0x0E && data != 0x0F &&
                    data != 0x1B && data != 0x5C &&
                    data != 0x7E)
            {
                res->flag   = 0;
                res->result = data;
                return MyENCODING_STATUS_OK;
            }

            res->flag = 0;
            return MyENCODING_STATUS_ERROR;
        }

        case 3: // Katakana
        {
            if(data == 0x1B) {
                res->second = 6;
                return MyENCODING_STATUS_CONTINUE;
            }
            else if(data >= 0x21 && data <= 0x5F) {
                res->flag   = 0;
                res->result = 0xFF61 + data - 0x21;

                return MyENCODING_STATUS_OK;
            }

            res->flag = 0;
            return MyENCODING_STATUS_ERROR;
        }

        case 4: // Lead byte
        {
            if(data == 0x1B) {
                res->second = 6;
                return MyENCODING_STATUS_CONTINUE;
            }
            else if(data >= 0x21 && data <= 0x7E) {
                res->flag   = 0;
                res->first  = data;
                res->second = 5;

                return MyENCODING_STATUS_CONTINUE;
            }

            res->flag = 0;
            return MyENCODING_STATUS_ERROR;
        }

        case 5: // Trail byte
        {
            if(data == 0x1B) {
                res->second = 6;
                return MyENCODING_STATUS_CONTINUE|MyENCODING_STATUS_ERROR;
            }
            else if(data >= 0x21 && data <= 0x7E) {
                res->second = 4;

                unsigned long pointer = (res->first - 0x21) * 94 + data - 0x21;
                res->result = myencoding_map_jis0208[pointer];

                if(res->result == 0)
                    return MyENCODING_STATUS_ERROR;

                return MyENCODING_STATUS_OK;
            }

            res->second = 4;
            return MyENCODING_STATUS_ERROR;
        }

        case 6: // Escape start
        {
            if(data == 0x24 || data == 0x28) {
                res->first = data;
                res->second = 7;

                return MyENCODING_STATUS_CONTINUE;
            }

            res->flag   = 0;
            res->second = res->third;

            return MyENCODING_STATUS_ERROR;
        }

        case 7: // Escape
        {
            unsigned long lead = res->first;
            res->first = 0x00;

            res->second = 0x00;

            if(lead == 0x28 && data == 0x42) {
                res->second = 0;
            }
            else if(lead == 0x28 && data == 0x4A) {
                res->second = 1;
            }
            else if(lead == 0x28 && data == 0x49) {
                res->second = 2;
            }
            else if(lead == 0x24 && (data == 0x40 || data == 0x42)) {
                res->second = 4;
            }

            if(res->second)
            {
                res->third = res->second;

                unsigned long output_flag = res->flag;
                res->flag = 1;

                if(output_flag)
                    return MyENCODING_STATUS_ERROR;

                return MyENCODING_STATUS_CONTINUE;
            }

            res->flag = 0;
            res->second = res->third;

            return MyENCODING_STATUS_ERROR;
        }

        default:
            break;
    }

    return 0;
}

myencoding_status_t myencoding_decode_shift_jis(unsigned const char data, myencoding_result_t *res)
{
    // res->first -- lead

    if(res->first)
    {
        unsigned long offset;
        unsigned long lead_offset;

        if(data < 0x7F)
            offset = 0x40;
        else
            offset = 0x41;

        if(res->first < 0xA0)
            lead_offset = 0x81;
        else
            lead_offset = 0xC1;

        unsigned long pointer = 0x00;
        if((data >= 0x40 && data <= 0x7E) ||
           (data >= 0x80 && data <= 0xFC))
        {
            pointer = (res->first - lead_offset) * 188 + data - offset;
        }

        if(pointer)
            res->result = myencoding_map_jis0208[pointer];
        else
            res->result = 0x00;

        if(res->result == 0x00 && (pointer >= 8836 && pointer <= 10528)) {
            res->result = 0xE000 + pointer - 8836;
        }

        if(res->result)
            return MyENCODING_STATUS_OK;

        return MyENCODING_STATUS_ERROR;
    }

    if((data <= 0x7F) || data == 0x80) {
        res->result = data;
        return MyENCODING_STATUS_OK;
    }

    if(data >= 0xA1 && data <= 0xDF) {
        res->result = 0xFF61 + data - 0xA1;
        return MyENCODING_STATUS_OK;
    }

    if((data >= 0x81 && data <= 0x9F) ||
       (data >= 0xE0 && data <= 0xFC)) {
        res->first = data;
        return MyENCODING_STATUS_CONTINUE;
    }

    return MyENCODING_STATUS_ERROR;
}

myencoding_status_t myencoding_decode_euc_kr(unsigned const char data, myencoding_result_t *res)
{
    // res->first -- lead

    if(res->first)
    {
        unsigned long lead = res->first, pointer = 0x00;
        res->first = 0x00;

        if(data >= 0x41 && data <= 0xFE) {
            pointer = (lead - 0x81) * 190 + (data - 0x41);
        }

        if(pointer)
            res->result = myencoding_map_euc_kr[pointer];

        if(res->result)
            return MyENCODING_STATUS_OK;

        return MyENCODING_STATUS_ERROR;
    }

    if(data <= 0x7F) {
        res->result = data;
        return MyENCODING_STATUS_OK;
    }

    if(data >= 0x81 && data <= 0xFE) {
        res->first = data;
        return MyENCODING_STATUS_CONTINUE;
    }

    return MyENCODING_STATUS_ERROR;
}

myencoding_status_t myencoding_decode_shared_utf_16(unsigned const char data, myencoding_result_t *res)
{
    // res->first  -- lead
    // res->second -- lead surrogate
    // res->flag   -- flag

    if(res->first == 0x00) {
        res->first = data;
        return MyENCODING_STATUS_CONTINUE;
    }

    unsigned long code_unit;
    if(res->flag)
        (code_unit = (res->first << 8) + data);
    else
        (code_unit = (unsigned long)(data << 8) + res->first);

    res->first = 0x00;

    if(res->second) {
        unsigned long lead_surrogate = res->second;
        res->second = 0x00;

        if(code_unit >= 0xDC00 && code_unit <= 0xDFFF) {
            res->result = 0x10000 + ((lead_surrogate - 0xD800) << 10) + (code_unit - 0xDC00);
            return MyENCODING_STATUS_OK;
        }

        unsigned char byte1 = (unsigned char)(code_unit >> 8);
        unsigned char byte2 = (unsigned char)(code_unit & 0x00FF);

        if(res->flag) {
            res->result     = byte1;
            res->result_aux = byte2;
        }
        else {
            res->result     = byte2;
            res->result_aux = byte1;
        }

        return MyENCODING_STATUS_DONE|MyENCODING_STATUS_ERROR;
    }

    if(code_unit >= 0xD800 && code_unit <= 0xDBFF) {
        res->second = code_unit;
        return MyENCODING_STATUS_CONTINUE;
    }

    if(code_unit >= 0xDC00 && code_unit <= 0xDFFF) {
        return MyENCODING_STATUS_ERROR;
    }

    res->result = code_unit;

    return MyENCODING_STATUS_OK;
}

myencoding_status_t myencoding_decode_utf_16be(unsigned const char data, myencoding_result_t *res)
{
    if(res->flag == 0)
        res->flag = 1;

    return myencoding_decode_shared_utf_16(data, res);
}

myencoding_status_t myencoding_decode_utf_16le(unsigned const char data, myencoding_result_t *res)
{
    if(res->flag)
        res->flag = 0;

    return myencoding_decode_shared_utf_16(data, res);
}

myencoding_status_t myencoding_decode_x_user_defined(unsigned const char data, myencoding_result_t *res)
{
    if(data <= 0x7F)
        res->result = data;
    else
        res->result = 0xF780 + data - 0x80;

    return MyENCODING_STATUS_OK;
}

void myencoding_result_clean(myencoding_result_t *res)
{
    memset(res, 0, sizeof(myencoding_result_t));
}

size_t myencoding_codepoint_ascii_length(size_t codepoint)
{
    if (codepoint <= 0x0000007F) {
        return 1;
    }
    else if (codepoint <= 0x000007FF) {
        return 2;
    }
    else if (codepoint <= 0x0000FFFF) {
        return 3;
    }
    else if (codepoint <= 0x001FFFFF) {
        return 4;
    }
    /* not uses in unicode */
    else if (codepoint <= 0x03FFFFFF) {
        return 5;
    }
    else if (codepoint <= 0x7FFFFFFF) {
        return 6;
    }

    return 0;
}

size_t myencoding_ascii_utf_8_length(const unsigned char data)
{
    if (data < 0x80){
        return 1;
    }
    else if ((data & 0xe0) == 0xc0) {
        return 2;
    }
    else if ((data & 0xf0) == 0xe0) {
        return 3;
    }
    else if ((data & 0xf8) == 0xf0) {
        return 4;
    }
    else if ((data & 0xfc) == 0xf8) {
        return 5;
    }
    else if ((data & 0xff) == 0xfc) {
        return 6;
    }

    return 0;
}

size_t myencoding_codepoint_to_ascii_utf_8(size_t codepoint, char *data)
{
    /* 0x80 -- 10xxxxxx */
    /* 0xC0 -- 110xxxxx */
    /* 0xE0 -- 1110xxxx */
    /* 0xF0 -- 11110xxx */
    /* 0xF8 -- 111110xx */
    /* 0xFC -- 1111110x */

    if (codepoint <= 0x0000007F) {
        /* 0xxxxxxx */
        data[0] = (char)codepoint;
        return 1;
    }
    else if (codepoint <= 0x000007FF) {
        /* 110xxxxx 10xxxxxx */
        data[0] = (char)(0xC0 | (codepoint >> 6  ));
        data[1] = (char)(0x80 | (codepoint & 0x3F));

        return 2;
    }
    else if (codepoint <= 0x0000FFFF) {
        /* 1110xxxx 10xxxxxx 10xxxxxx */
        data[0] = (char)(0xE0 | ((codepoint >> 12)));
        data[1] = (char)(0x80 | ((codepoint >> 6 ) & 0x3F));
        data[2] = (char)(0x80 | ( codepoint & 0x3F));

        return 3;
    }
    else if (codepoint <= 0x001FFFFF) {
        /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
        data[0] = (char)(0xF0 | ( codepoint >> 18));
        data[1] = (char)(0x80 | ((codepoint >> 12) & 0x3F));
        data[2] = (char)(0x80 | ((codepoint >> 6 ) & 0x3F));
        data[3] = (char)(0x80 | ( codepoint & 0x3F));

        return 4;
    }
    /* not uses in unicode */
    //    else if (codepoint <= 0x03FFFFFF) {
    //        /* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
    //        data[0] = 0xF8 | ( codepoint >> 24);
    //        data[1] = 0x80 | ((codepoint >> 18) & 0x3F);
    //        data[2] = 0x80 | ((codepoint >> 12) & 0x3F);
    //        data[3] = 0x80 | ((codepoint >> 6 ) & 0x3F);
    //        data[4] = 0x80 | ( codepoint & 0x3F);
    //
    //        return 5;
    //    }
    //    else if (codepoint <= 0x7FFFFFFF) {
    //        /* 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
    //        data[0] = 0xFC | ( codepoint >> 30);
    //        data[1] = 0x80 | ((codepoint >> 24) & 0x3F);
    //        data[2] = 0x80 | ((codepoint >> 18) & 0x3F);
    //        data[3] = 0x80 | ((codepoint >> 12) & 0x3F);
    //        data[4] = 0x80 | ((codepoint >> 6 ) & 0x3F);
    //        data[5] = 0x80 | ( codepoint & 0x3F);
    //
    //        return 6;
    //    }

    return 0;
}

size_t myencoding_codepoint_to_lowercase_ascii_utf_8(size_t codepoint, char *data)
{
    /* 0x80 -- 10xxxxxx */
    /* 0xC0 -- 110xxxxx */
    /* 0xE0 -- 1110xxxx */
    /* 0xF0 -- 11110xxx */
    /* 0xF8 -- 111110xx */
    /* 0xFC -- 1111110x */

    if (codepoint <= 0x0000007F) {
        /* 0xxxxxxx */
        data[0] = (char)mycore_string_chars_lowercase_map[ codepoint ];
        return 1;
    }
    else if (codepoint <= 0x000007FF) {
        /* 110xxxxx 10xxxxxx */
        data[0] = (char)(0xC0 | (codepoint >> 6  ));
        data[1] = (char)(0x80 | (codepoint & 0x3F));

        return 2;
    }
    else if (codepoint <= 0x0000FFFF) {
        /* 1110xxxx 10xxxxxx 10xxxxxx */
        data[0] = (char)(0xE0 | ((codepoint >> 12)));
        data[1] = (char)(0x80 | ((codepoint >> 6 ) & 0x3F));
        data[2] = (char)(0x80 | ( codepoint & 0x3F));

        return 3;
    }
    else if (codepoint <= 0x001FFFFF) {
        /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
        data[0] = (char)(0xF0 | ( codepoint >> 18));
        data[1] = (char)(0x80 | ((codepoint >> 12) & 0x3F));
        data[2] = (char)(0x80 | ((codepoint >> 6 ) & 0x3F));
        data[3] = (char)(0x80 | ( codepoint & 0x3F));

        return 4;
    }
    /* not uses in unicode */
    //    else if (codepoint <= 0x03FFFFFF) {
    //        /* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
    //        data[0] = 0xF8 | ( codepoint >> 24);
    //        data[1] = 0x80 | ((codepoint >> 18) & 0x3F);
    //        data[2] = 0x80 | ((codepoint >> 12) & 0x3F);
    //        data[3] = 0x80 | ((codepoint >> 6 ) & 0x3F);
    //        data[4] = 0x80 | ( codepoint & 0x3F);
    //
    //        return 5;
    //    }
    //    else if (codepoint <= 0x7FFFFFFF) {
    //        /* 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
    //        data[0] = 0xFC | ( codepoint >> 30);
    //        data[1] = 0x80 | ((codepoint >> 24) & 0x3F);
    //        data[2] = 0x80 | ((codepoint >> 18) & 0x3F);
    //        data[3] = 0x80 | ((codepoint >> 12) & 0x3F);
    //        data[4] = 0x80 | ((codepoint >> 6 ) & 0x3F);
    //        data[5] = 0x80 | ( codepoint & 0x3F);
    //
    //        return 6;
    //    }

    return 0;
}

size_t myencoding_ascii_utf_8_to_codepoint(const unsigned char* data, size_t* codepoint)
{
    if (*data < 0x80){
        /* 0xxxxxxx */
        *codepoint = (size_t)*data;
        return 1;
    }
    else if ((*data & 0xe0) == 0xc0) {
        /* 110xxxxx 10xxxxxx */
        *codepoint  = (data[0] ^ (0xC0 & data[0])) << 6;
        *codepoint |= (data[1] ^ (0x80 & data[1]));

        return 2;
    }
    else if ((*data & 0xf0) == 0xe0) {
        /* 1110xxxx 10xxxxxx 10xxxxxx */
        *codepoint  = (data[0] ^ (0xE0 & data[0])) << 12;
        *codepoint |= (data[1] ^ (0x80 & data[1])) << 6;
        *codepoint |= (data[2] ^ (0x80 & data[2]));

        return 3;
    }
    else if ((*data & 0xf8) == 0xf0) {
        /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
        *codepoint  = (data[0] ^ (0xF0 & data[0])) << 18;
        *codepoint |= (data[1] ^ (0x80 & data[1])) << 12;
        *codepoint |= (data[2] ^ (0x80 & data[2])) << 6;
        *codepoint |= (data[3] ^ (0x80 & data[3]));

        return 4;
    }

    return 0;
}

size_t myencoding_codepoint_to_ascii_utf_16(size_t codepoint, char *data)
{
    if((codepoint >> 16)) {
        codepoint -= 0x10000;

        size_t high = 0xD800 | (codepoint >> 10);
        size_t low = 0xDC00 | (codepoint & 0x3FF);

        data[0] = (char)(high >> 8);
        data[1] = (char)high;
        data[2] = (char)(low >> 8);
        data[3] = (char)low;

        return 4;
    }

    data[0] = (char)(codepoint >> 8);
    data[1] = (char)codepoint;

    return 2;
}

size_t myencoding_convert_to_ascii_utf_8(mycore_string_raw_t* raw_str, const char* buff, size_t length, myencoding_t encoding)
{
    if(raw_str->data == NULL) {
        raw_str->size   = length + 1;
        raw_str->length = 0;
        raw_str->data   = mycore_malloc(sizeof(char) * raw_str->size);

        if(raw_str->data == NULL)
            return 0;
    }

    myencoding_result_t res = {0};

    unsigned const char* u_buff = (unsigned const char*)buff;
    const myencoding_custom_f func = myencoding_get_function_by_id(encoding);

    size_t i;
    for (i = 0; i < length; i++)
    {
        if(func(u_buff[i], &res) == MyENCODING_STATUS_OK) {
            if((raw_str->length + 6) >= raw_str->size) {
                size_t new_size = raw_str->length + 6 + (length / 2);
                char *new_data  = mycore_realloc(raw_str->data, sizeof(char) * new_size);

                if(new_data == NULL) {
                    return 0;
                }

                raw_str->data = new_data;
                raw_str->size = new_size;
            }

            raw_str->length += myencoding_codepoint_to_ascii_utf_8(res.result, &raw_str->data[raw_str->length]);
        }
    }

    return i;
}