`
hudeyong926
  • 浏览: 2037289 次
  • 来自: 武汉
社区版块
存档分类
最新评论

php汉字换转成拼音

阅读更多
<?php
/**
 * PHP 汉字转拼音
 * @author Jerryli(hzjerry@gmail.com)
 * @version V0.20140715
 * @package SPFW.core.lib.final
 * @global SEA_PHP_FW_VAR_ENV
 * @example
 *    echo CUtf8_PY::encode('阿里巴巴科技有限公司'); //编码为拼音首字母
 *    echo CUtf8_PY::encode('阿里巴巴科技有限公司', 'all'); //编码为全拼音
 */
class CUtf8_PY
{
    /**
     * 拼音字符转换图
     * @var array
     */
    private static $_aMaps = array(
        'a' => -20319, 'ai' => -20317, 'an' => -20304, 'ang' => -20295, 'ao' => -20292,
        'ba' => -20283, 'bai' => -20265, 'ban' => -20257, 'bang' => -20242, 'bao' => -20230, 'bei' => -20051, 'ben' => -20036, 'beng' => -20032, 'bi' => -20026, 'bian' => -20002, 'biao' => -19990, 'bie' => -19986, 'bin' => -19982, 'bing' => -19976, 'bo' => -19805, 'bu' => -19784,
        'ca' => -19775, 'cai' => -19774, 'can' => -19763, 'cang' => -19756, 'cao' => -19751, 'ce' => -19746, 'ceng' => -19741, 'cha' => -19739, 'chai' => -19728, 'chan' => -19725, 'chang' => -19715, 'chao' => -19540, 'che' => -19531, 'chen' => -19525, 'cheng' => -19515, 'chi' => -19500, 'chong' => -19484, 'chou' => -19479, 'chu' => -19467, 'chuai' => -19289, 'chuan' => -19288, 'chuang' => -19281, 'chui' => -19275, 'chun' => -19270, 'chuo' => -19263, 'ci' => -19261, 'cong' => -19249, 'cou' => -19243, 'cu' => -19242, 'cuan' => -19238, 'cui' => -19235, 'cun' => -19227, 'cuo' => -19224,
        'da' => -19218, 'dai' => -19212, 'dan' => -19038, 'dang' => -19023, 'dao' => -19018, 'de' => -19006, 'deng' => -19003, 'di' => -18996, 'dian' => -18977, 'diao' => -18961, 'die' => -18952, 'ding' => -18783, 'diu' => -18774, 'dong' => -18773, 'dou' => -18763, 'du' => -18756, 'duan' => -18741, 'dui' => -18735, 'dun' => -18731, 'duo' => -18722,
        'e' => -18710, 'en' => -18697, 'er' => -18696,
        'fa' => -18526, 'fan' => -18518, 'fang' => -18501, 'fei' => -18490, 'fen' => -18478, 'feng' => -18463, 'fo' => -18448, 'fou' => -18447, 'fu' => -18446,
        'ga' => -18239, 'gai' => -18237, 'gan' => -18231, 'gang' => -18220, 'gao' => -18211, 'ge' => -18201, 'gei' => -18184, 'gen' => -18183, 'geng' => -18181, 'gong' => -18012, 'gou' => -17997, 'gu' => -17988, 'gua' => -17970, 'guai' => -17964, 'guan' => -17961, 'guang' => -17950, 'gui' => -17947, 'gun' => -17931, 'guo' => -17928,
        'ha' => -17922, 'hai' => -17759, 'han' => -17752, 'hang' => -17733, 'hao' => -17730, 'he' => -17721, 'hei' => -17703, 'hen' => -17701, 'heng' => -17697, 'hong' => -17692, 'hou' => -17683, 'hu' => -17676, 'hua' => -17496, 'huai' => -17487, 'huan' => -17482, 'huang' => -17468, 'hui' => -17454, 'hun' => -17433, 'huo' => -17427,
        'ji' => -17417, 'jia' => -17202, 'jian' => -17185, 'jiang' => -16983, 'jiao' => -16970, 'jie' => -16942, 'jin' => -16915, 'jing' => -16733, 'jiong' => -16708, 'jiu' => -16706, 'ju' => -16689, 'juan' => -16664, 'jue' => -16657, 'jun' => -16647,
        'ka' => -16474, 'kai' => -16470, 'kan' => -16465, 'kang' => -16459, 'kao' => -16452, 'ke' => -16448, 'ken' => -16433, 'keng' => -16429, 'kong' => -16427, 'kou' => -16423, 'ku' => -16419, 'kua' => -16412, 'kuai' => -16407, 'kuan' => -16403, 'kuang' => -16401, 'kui' => -16393, 'kun' => -16220, 'kuo' => -16216,
        'la' => -16212, 'lai' => -16205, 'lan' => -16202, 'lang' => -16187, 'lao' => -16180, 'le' => -16171, 'lei' => -16169, 'leng' => -16158, 'li' => -16155, 'lia' => -15959, 'lian' => -15958, 'liang' => -15944, 'liao' => -15933, 'lie' => -15920, 'lin' => -15915, 'ling' => -15903, 'liu' => -15889, 'long' => -15878, 'lou' => -15707, 'lu' => -15701, 'lv' => -15681, 'luan' => -15667, 'lue' => -15661, 'lun' => -15659, 'luo' => -15652,
        'ma' => -15640, 'mai' => -15631, 'man' => -15625, 'mang' => -15454, 'mao' => -15448, 'me' => -15436, 'mei' => -15435, 'men' => -15419, 'meng' => -15416, 'mi' => -15408, 'mian' => -15394, 'miao' => -15385, 'mie' => -15377, 'min' => -15375, 'ming' => -15369, 'miu' => -15363, 'mo' => -15362, 'mou' => -15183, 'mu' => -15180,
        'na' => -15165, 'nai' => -15158, 'nan' => -15153, 'nang' => -15150, 'nao' => -15149, 'ne' => -15144, 'nei' => -15143, 'nen' => -15141, 'neng' => -15140, 'ni' => -15139, 'nian' => -15128, 'niang' => -15121, 'niao' => -15119, 'nie' => -15117, 'nin' => -15110, 'ning' => -15109, 'niu' => -14941, 'nong' => -14937, 'nu' => -14933, 'nv' => -14930, 'nuan' => -14929, 'nue' => -14928, 'nuo' => -14926,
        'o' => -14922, 'ou' => -14921,
        'pa' => -14914, 'pai' => -14908, 'pan' => -14902, 'pang' => -14894, 'pao' => -14889, 'pei' => -14882, 'pen' => -14873, 'peng' => -14871, 'pi' => -14857, 'pian' => -14678, 'piao' => -14674, 'pie' => -14670, 'pin' => -14668, 'ping' => -14663, 'po' => -14654, 'pu' => -14645,
        'qi' => -14630, 'qia' => -14594, 'qian' => -14429, 'qiang' => -14407, 'qiao' => -14399, 'qie' => -14384, 'qin' => -14379, 'qing' => -14368, 'qiong' => -14355, 'qiu' => -14353, 'qu' => -14345, 'quan' => -14170, 'que' => -14159, 'qun' => -14151,
        'ran' => -14149, 'rang' => -14145, 'rao' => -14140, 're' => -14137, 'ren' => -14135, 'reng' => -14125, 'ri' => -14123, 'rong' => -14122, 'rou' => -14112, 'ru' => -14109, 'ruan' => -14099, 'rui' => -14097, 'run' => -14094, 'ruo' => -14092,
        'sa' => -14090, 'sai' => -14087, 'san' => -14083, 'sang' => -13917, 'sao' => -13914, 'se' => -13910, 'sen' => -13907, 'seng' => -13906, 'sha' => -13905, 'shai' => -13896, 'shan' => -13894, 'shang' => -13878, 'shao' => -13870, 'she' => -13859, 'shen' => -13847, 'sheng' => -13831, 'shi' => -13658, 'shou' => -13611, 'shu' => -13601, 'shua' => -13406, 'shuai' => -13404, 'shuan' => -13400, 'shuang' => -13398, 'shui' => -13395, 'shun' => -13391, 'shuo' => -13387, 'si' => -13383, 'song' => -13367, 'sou' => -13359, 'su' => -13356, 'suan' => -13343, 'sui' => -13340, 'sun' => -13329, 'suo' => -13326,
        'ta' => -13318, 'tai' => -13147, 'tan' => -13138, 'tang' => -13120, 'tao' => -13107, 'te' => -13096, 'teng' => -13095, 'ti' => -13091, 'tian' => -13076, 'tiao' => -13068, 'tie' => -13063, 'ting' => -13060, 'tong' => -12888, 'tou' => -12875, 'tu' => -12871, 'tuan' => -12860, 'tui' => -12858, 'tun' => -12852, 'tuo' => -12849,
        'wa' => -12838, 'wai' => -12831, 'wan' => -12829, 'wang' => -12812, 'wei' => -12802, 'wen' => -12607, 'weng' => -12597, 'wo' => -12594, 'wu' => -12585,
        'xi' => -12556, 'xia' => -12359, 'xian' => -12346, 'xiang' => -12320, 'xiao' => -12300, 'xie' => -12120, 'xin' => -12099, 'xing' => -12089, 'xiong' => -12074, 'xiu' => -12067, 'xu' => -12058, 'xuan' => -12039, 'xue' => -11867, 'xun' => -11861,
        'ya' => -11847, 'yan' => -11831, 'yang' => -11798, 'yao' => -11781, 'ye' => -11604, 'yi' => -11589, 'yin' => -11536, 'ying' => -11358, 'yo' => -11340, 'yong' => -11339, 'you' => -11324, 'yu' => -11303, 'yuan' => -11097, 'yue' => -11077, 'yun' => -11067,
        'za' => -11055, 'zai' => -11052, 'zan' => -11045, 'zang' => -11041, 'zao' => -11038, 'ze' => -11024, 'zei' => -11020, 'zen' => -11019, 'zeng' => -11018, 'zha' => -11014, 'zhai' => -10838, 'zhan' => -10832, 'zhang' => -10815, 'zhao' => -10800, 'zhe' => -10790, 'zhen' => -10780, 'zheng' => -10764, 'zhi' => -10587, 'zhong' => -10544, 'zhou' => -10533, 'zhu' => -10519, 'zhua' => -10331, 'zhuai' => -10329, 'zhuan' => -10328, 'zhuang' => -10322, 'zhui' => -10315, 'zhun' => -10309, 'zhuo' => -10307, 'zi' => -10296, 'zong' => -10281, 'zou' => -10274, 'zu' => -10270, 'zuan' => -10262, 'zui' => -10260, 'zun' => -10256, 'zuo' => -10254
    );

    /**
     * 将中文编码成拼音
     * @param string $utf8Data utf8字符集数据
     * @param string $sRetFormat 返回格式 [head:首字母|all:全拼音]
     * @return string
     */
    public static function encode($utf8Data, $sRetFormat = 'head')
    {
        $sGBK = iconv('UTF-8', 'GBK', $utf8Data);
        $aBuf = array();
        for ($i = 0, $iLoop = strlen($sGBK); $i < $iLoop; $i++) {
            $iChr = ord($sGBK{$i});
            if ($iChr > 160)
                $iChr = ($iChr << 8) + ord($sGBK{++$i}) - 65536;
            if ('head' === $sRetFormat)
                $aBuf[] = substr(self::zh2py($iChr), 0, 1);
            else
                $aBuf[] = self::zh2py($iChr);
        }
        if ('head' === $sRetFormat)
            return implode('', $aBuf);
        else
            return implode(' ', $aBuf);
    }

    /**
     * 中文转换到拼音(每次处理一个字符)
     * @param number $iWORD 待处理字符双字节
     * @return string 拼音
     */
    private static function zh2py($iWORD)
    {
        if ($iWORD > 0 && $iWORD < 160) {
            return chr($iWORD);
        } elseif ($iWORD < -20319 || $iWORD > -10247) {
            return '';
        } else {
            foreach (self::$_aMaps as $py => $code) {
                if ($code > $iWORD) break;
                $result = $py;
            }
            return $result;
        }
    }
}
?>

优化了传统的拼音转换处理类的算法,专门针对UTF-8字符集进行处理。如果你使用GBK或GB2312字符集,只需要去掉iconv函数,直接赋值,不需要转换就行。

https://github.com/overtrue/pinyin

//多音字
// 了
Pinyin::trans('了然'); // liǎo rán
Pinyin::trans('来了'); // lái le

 

分享到:
评论

相关推荐

    php中文转换成拼音api

    "php中文转换成拼音API"是解决这一需求的工具,它允许开发者将汉字转换为对应的汉语拼音。这个API可能包含了一个类库,如`ChinesePinyin.class.php`,用于处理中文到拼音的转换。 `ChinesePinyin.class.php` 文件很...

    中文汉字转换成完全拼音

    在IT领域,中文汉字转换成完全拼音是一项常见的需求,尤其在自然语言处理、搜索引擎优化、语音识别和输入法设计等方面有着广泛的应用。本项目提供的工具实现了这一功能,允许用户输入汉字,得到对应的英文全拼。此外...

    PHP汉字拼音转换

    Pinyin.class.php类文件可以将大多数汉字转换成汉语拼音,当然也有个别生僻字不能转换,如果你想转换所有的汉字拼音的话,可能需要再配合一个汉字字库来实现,使用该类文件就基本能满足你的项目需求了。用法: ...

    把汉字转换成拼音的PHP类

    在PHP编程语言中,处理汉字转换为拼音的需求是常见的,特别是在中文网站的SEO优化、关键词提取或者搜索引擎友好接口设计等方面。这个"把汉字转换成拼音的PHP类"提供了一个便捷的解决方案,允许开发者将汉字字符串...

    php汉字转拼音 降汉字转换为拼音,utf-8

    在PHP编程语言中,将汉字转换为...总的来说,PHP汉字转拼音是处理中文数据时不可或缺的技术,而UTF-8编码则保证了在不同系统和平台间数据的兼容性。通过选择合适的库并理解其工作原理,开发者可以高效地实现这一功能。

    汉字转成拼音 汉转拼.php

    汉转拼 php 汉字转成拼音 在线转换单个文件很方便...

    php 中文转换为拼音

    php实现转换 中文 为 拼音的全拼 或 拼音首字母,测试,无问题

    PHP 汉字转换拼音

    如果你不想使用外部库,也可以自己编写函数实现基础的汉字转拼音功能。这通常涉及查找汉字与拼音的对应关系,可以通过预处理生成的拼音字典或者使用Unicode分类来实现。例如,我们可以根据Unicode区间判断汉字属于...

    php汉字转换拼音程序

    php汉字转换拼音程序php汉字转换拼音程序php汉字转换拼音程序php汉字转换拼音程序php汉字转换拼音程序vvv

    php 汉字转换为拼音

    基于《最全的PHP汉字转拼音函数(共25961字,包含20902个基本汉字+5059生僻字)》扩展的。

    php汉字转拼音及获取拼音首字母.pdf

    PHP 汉字转拼音及获取拼音首字母 PHP 是一种服务器端脚本语言,广泛应用于 Web 开发领域。在处理汉字数据时,需要将汉字转换为拼音,以便于数据处理和检索。下面将详细介绍 PHP 中汉字转拼音及获取拼音首字母的...

    中文汉字转拼音

    在IT领域,中文汉字转拼音是一项重要的技术,特别是在处理中文数据、搜索引擎优化(SEO)、语音合成、自动文本处理等方面有着广泛的应用。PHP作为一款流行的服务器端脚本语言,提供了多种方法来实现这一功能。本文将...

    PHP高效汉字转拼音类

    // 简单的高效的汉字转拼音、拼音转汉字类 // 包含20441个汉字26821拼音转换库文件,字库基本是网上最全的了,当然万一发现少了可以自己加入到字库中,一行一条记录加入到文本文件中即可,非常简单 // 算法方面没有...

    PHP中文工具类支持汉字转拼音拼音分词简繁互转

    PHP 中文工具类,支持汉字转拼音、拼音分词、简繁互转。 PHP Chinese Tool class, support Chinese pinyin, pinyin participle, simplified and traditional conversion

    php 汉字转换成拼音

    该类是汉字转换成拼音类,本人已经测试过,应用的时候应该注意编码问题

    ThinkPHP中文转拼音字母简单调用

    首先,我们要理解的是,中文转拼音主要是通过特定的算法和库来完成的,这些库能够将汉字映射到对应的汉语拼音。在这个案例中,我们看到一个名为`String.class.php`的文件,这可能是一个自定义的字符串处理类,包含了...

    汉字转全拼音或拼音首字母,支持gb2312和utf8编码

    汉字转全拼音或拼音首字母,支持gb2312和utf8编码 ,附带函数说明, 汉字转换为全拼音或拼音首字母 使用方法: include(ROOT_PATH.'includes/cls_chart_pinyin.php'); echo Pinyin('中国人','utf-8',false); 函数...

    最好的汉字转换拼音utf-8 php版 权威

    标题中的"最好的汉字转换拼音utf-8 php版 权威"指的是一个专为PHP设计的库或工具,用于将汉字转换成对应的拼音,同时支持UTF-8编码,这是国际化的标准编码格式,可以处理多种语言,包括中文。这个工具对于那些需要...

    汉字转换拼音的PHP库.zip

     //单个汉字拼音的字典  private static $dic = array(原生的只有转换 是否返回首字母功能。扩展后支持1. 返回格式 [all:全拼音|first:首字母|one:仅第一字符首字母]2. 无法识别的字符替换用的占位符 (原版写...

    PHP汉字转拼音源代码

    "PHP汉字转拼音源代码"就是这样一个工具,它允许开发者通过调用特定的函数,将输入的汉字字符串转换成对应的拼音表示。 首先,我们要理解汉字转拼音的基本原理。这个过程通常涉及到汉字到拼音的映射,这需要一个...

Global site tag (gtag.js) - Google Analytics