utf8转unicode的总结

工作上遇到转unicode的一些总结和记录

全部转


// 全部转
function unicode_encode2($name)
{
    $name = iconv('UTF-8', 'UCS-2', $name);
    $len = strlen($name);
    $str = '';
    for ($i = 0; $i < $len - 1; $i = $i + 2)
    {
        $c = $name[$i];
        $c2 = $name[$i + 1];
        if (ord($c) > 0)
        { //两个字节的文字
            $str .= '\u'.str_pad(base_convert(ord($c2), 10, 16), 2, 0, STR_PAD_LEFT).base_convert(ord($c), 10, 16);
        }
        else
        {
            $str .= $c2;
        }
    }
    return $str;
}

只转中文,英文及逗号不转


// 只转中文,英文及逗号不转
// 通过字节判断,中文两个字节,只转两个字节的字符
function unicode_encode($name)
{
    $name = iconv('UTF-8', 'UCS-2', $name);
    $len = strlen($name);
    $str = '';
    for ($i = 0; $i < $len - 1; $i = $i + 2)
    {
        $c = $name[$i];
        $c2 = $name[$i + 1];
        if (ord($c2) > 0)
        { //两个字节的文字
            $str .= '\u'.base_convert(ord($c2), 10, 16).str_pad(base_convert(ord($c), 10, 16), 2, 0, STR_PAD_LEFT);
        }
        else
        {
            $str .= $c;
        }
    }
    return $str;
}

由于特殊字符也占一个字节,同时也需要转


// 由于特殊字符也占一个字节,同时也需要转
function unicode_encode2($name)
{
    $name = iconv('UTF-8', 'UCS-2', $name);
    $len = strlen($name);
    $str = '';
    for ($i = 0; $i < $len - 1; $i = $i + 2)
    {
        $c = $name[$i];
        $c2 = $name[$i + 1];
        if (ord($c) > 0)
        { //两个字节的文字
            $str .= '\u'.str_pad(base_convert(ord($c2), 10, 16), 2, 0, STR_PAD_LEFT).base_convert(ord($c), 10, 16);
        }
        else
        {
            $str .= $c2;
        }
    }
    return $str;
}
// '·'

解码


// 解码
//将UNICODE编码后的内容进行解码
function unicode_decode($name)
{
    //转换编码,将Unicode编码转换成可以浏览的utf-8编码
    $pattern = '/([\w]+)|(\\\u([\w]{4}))/i';
    preg_match_all($pattern, $name, $matches);
    if (!empty($matches))
    {
        $name = '';
        for ($j = 0; $j < count($matches[0]); $j++)
        {
            $str = $matches[0][$j];
            if (strpos($str, '\\u') === 0)
            {
                $code = base_convert(substr($str, 2, 2), 16, 10);
                $code2 = base_convert(substr($str, 4), 16, 10);
                $c = chr($code).chr($code2);
                $c = iconv('UCS-2', 'UTF-8', $c);
                $name .= $c;
            }
            else
            {
                $name .= $str;
            }
        }
    }
    return $name;
}

标签: unicode, utf8