肥宅自学平台_人人成为技术开发者

肥宅人只等待您,联系我们吧,曾经的巅峰是否能回来?我们正在用心服务!

PHP汉字转拼音代码支持6000个汉字左右

 更新时间:2024-04-15 15:19:49   作者:肥宅-季波   我要评论(0)  

         最近一个项目中使用php作为为前端提供数据的开发语言,为了实现能够按照字母排序,需要提取中文的汉语拼音,借这个项目写了个汉字转拼音的脚本,脚本比较简单,注释也比较详细,这里就不多说了,直接上代码了。

使用方法
  1. <?php
  2.     $py = new PinYin();
  3.     $all_py = $py->get_all_py("吴国伟"); //输出 ['wu','guo','wei'],输出字符串调用join方法,join('',$all_py)
  4.     $first_py = $py->get_first_py($all_py);//输出 wgw
  5.     $first_letter = $py->get_first_letter($all_py);// 输出 w
  6. ?>
PHP
类文件
  1. <?php
  2. /**
  3. * +------------------------------------------------------
  4. *    PHP 汉字转拼音
  5. * +------------------------------------------------------
  6. *    使用方法:
  7. *           $py = new PinYin();
  8. *           $all_py = $py->get_all_py("吴国伟"); //输出 ['wu','guo','wei'],输出字符串调用join方法,join('',$all_py)
  9. *           $first_py = $py->get_first_py($all_py);//输出 wgw
  10. *           $first_letter = $py->get_first_letter($all_py);// 输出 w
  11. *
  12. * +------------------------------------------------------
  13. */
  14. class PinYin
  15. {
  16.     private $dict_list = array(
  17.         'a' => -20319, 'ai' => -20317, 'an' => -20304, 'ang' => -20295, 'ao' => -20292,
  18.         'ba' => -20283, 'bai' => -20265, 'ban' => -20257, 'bang' => -20242, 'bao' => -20230, 'bei' => -20051, 'ben' => -20036, 'beng' => -20032, 'bi' => -20026, 'bian' => -20002, 'biao' => -19990, 'bie' => -19986, 'bin' => -19982, 'bing' => -19976, 'bo' => -19805, 'bu' => -19784,
  19.         'ca' => -19775, 'cai' => -19774, 'can' => -19763, 'cang' => -19756, 'cao' => -19751, 'ce' => -19746, 'ceng' => -19741, 'cha' => -19739, 'chai' => -19728, 'chan' => -19725, 'chang' => -19715, 'chao' => -19540, 'che' => -19531, 'chen' => -19525, 'cheng' => -19515, 'chi' => -19500, 'chong' => -19484, 'chou' => -19479, 'chu' => -19467, 'chuai' => -19289, 'chuan' => -19288, 'chuang' => -19281, 'chui' => -19275, 'chun' => -19270, 'chuo' => -19263, 'ci' => -19261, 'cong' => -19249, 'cou' => -19243, 'cu' => -19242, 'cuan' => -19238, 'cui' => -19235, 'cun' => -19227, 'cuo' => -19224,
  20.         'da' => -19218, 'dai' => -19212, 'dan' => -19038, 'dang' => -19023, 'dao' => -19018, 'de' => -19006, 'deng' => -19003, 'di' => -18996, 'dian' => -18977, 'diao' => -18961, 'die' => -18952, 'ding' => -18783, 'diu' => -18774, 'dong' => -18773, 'dou' => -18763, 'du' => -18756, 'duan' => -18741, 'dui' => -18735, 'dun' => -18731, 'duo' => -18722,
  21.         'e' => -18710, 'en' => -18697, 'er' => -18696,
  22.         'fa' => -18526, 'fan' => -18518, 'fang' => -18501, 'fei' => -18490, 'fen' => -18478, 'feng' => -18463, 'fo' => -18448, 'fou' => -18447, 'fu' => -18446,
  23.         'ga' => -18239, 'gai' => -18237, 'gan' => -18231, 'gang' => -18220, 'gao' => -18211, 'ge' => -18201, 'gei' => -18184, 'gen' => -18183, 'geng' => -18181, 'gong' => -18012, 'gou' => -17997, 'gu' => -17988, 'gua' => -17970, 'guai' => -17964, 'guan' => -17961, 'guang' => -17950, 'gui' => -17947,
  24.         'gun' => -17931, 'guo' => -17928,
  25.         'ha' => -17922, 'hai' => -17759, 'han' => -17752, 'hang' => -17733, 'hao' => -17730, 'he' => -17721, 'hei' => -17703, 'hen' => -17701, 'heng' => -17697, 'hong' => -17692, 'hou' => -17683, 'hu' => -17676, 'hua' => -17496, 'huai' => -17487, 'huan' => -17482, 'huang' => -17468, 'hui' => -17454,
  26.         'hun' => -17433, 'huo' => -17427,
  27.         'ji' => -17417, 'jia' => -17202, 'jian' => -17185, 'jiang' => -16983, 'jiao' => -16970, 'jie' => -16942, 'jin' => -16915, 'jing' => -16733, 'jiong' => -16708, 'jiu' => -16706, 'ju' => -16689, 'juan' => -16664, 'jue' => -16657, 'jun' => -16647,
  28.         'ka' => -16474, 'kai' => -16470, 'kan' => -16465, 'kang' => -16459, 'kao' => -16452, 'ke' => -16448, 'ken' => -16433, 'keng' => -16429, 'kong' => -16427, 'kou' => -16423, 'ku' => -16419, 'kua' => -16412, 'kuai' => -16407, 'kuan' => -16403, 'kuang' => -16401, 'kui' => -16393, 'kun' => -16220, 'kuo' => -16216,
  29.         'la' => -16212, 'lai' => -16205, 'lan' => -16202, 'lang' => -16187, 'lao' => -16180, 'le' => -16171, 'lei' => -16169, 'leng' => -16158, 'li' => -16155, 'lia' => -15959, 'lian' => -15958, 'liang' => -15944, 'liao' => -15933, 'lie' => -15920, 'lin' => -15915, 'ling' => -15903, 'liu' => -15889,
  30.         'long' => -15878, 'lou' => -15707, 'lu' => -15701, 'lv' => -15681, 'luan' => -15667, 'lue' => -15661, 'lun' => -15659, 'luo' => -15652,
  31.         'ma' => -15640, 'mai' => -15631, 'man' => -15625, 'mang' => -15454, 'mao' => -15448, 'me' => -15436, 'mei' => -15435, 'men' => -15419, 'meng' => -15416, 'mi' => -15408, 'mian' => -15394, 'miao' => -15385, 'mie' => -15377, 'min' => -15375, 'ming' => -15369, 'miu' => -15363, 'mo' => -15362, 'mou' => -15183, 'mu' => -15180,
  32.         'na' => -15165, 'nai' => -15158, 'nan' => -15153, 'nang' => -15150, 'nao' => -15149, 'ne' => -15144, 'nei' => -15143, 'nen' => -15141, 'neng' => -15140, 'ni' => -15139, 'nian' => -15128, 'niang' => -15121, 'niao' => -15119, 'nie' => -15117, 'nin' => -15110, 'ning' => -15109, 'niu' => -14941,
  33.         'nong' => -14937, 'nu' => -14933, 'nv' => -14930, 'nuan' => -14929, 'nue' => -14928, 'nuo' => -14926,
  34.         'o' => -14922, 'ou' => -14921,
  35.         'pa' => -14914, 'pai' => -14908, 'pan' => -14902, 'pang' => -14894, 'pao' => -14889, 'pei' => -14882, 'pen' => -14873, 'peng' => -14871, 'pi' => -14857, 'pian' => -14678, 'piao' => -14674, 'pie' => -14670, 'pin' => -14668, 'ping' => -14663, 'po' => -14654, 'pu' => -14645,
  36.         'qi' => -14630, 'qia' => -14594, 'qian' => -14429, 'qiang' => -14407, 'qiao' => -14399, 'qie' => -14384, 'qin' => -14379, 'qing' => -14368, 'qiong' => -14355, 'qiu' => -14353, 'qu' => -14345, 'quan' => -14170, 'que' => -14159, 'qun' => -14151,
  37.         'ran' => -14149, 'rang' => -14145, 'rao' => -14140, 're' => -14137, 'ren' => -14135, 'reng' => -14125, 'ri' => -14123, 'rong' => -14122, 'rou' => -14112, 'ru' => -14109, 'ruan' => -14099, 'rui' => -14097, 'run' => -14094, 'ruo' => -14092,
  38.         'sa' => -14090, 'sai' => -14087, 'san' => -14083, 'sang' => -13917, 'sao' => -13914, 'se' => -13910, 'sen' => -13907, 'seng' => -13906, 'sha' => -13905, 'shai' => -13896, 'shan' => -13894, 'shang' => -13878, 'shao' => -13870, 'she' => -13859, 'shen' => -13847, 'sheng' => -13831, 'shi' => -13658, 'shou' => -13611, 'shu' => -13601, 'shua' => -13406, 'shuai' => -13404, 'shuan' => -13400, 'shuang' => -13398, 'shui' => -13395, 'shun' => -13391, 'shuo' => -13387, 'si' => -13383, 'song' => -13367, 'sou' => -13359, 'su' => -13356, 'suan' => -13343, 'sui' => -13340, 'sun' => -13329, 'suo' => -13326,
  39.         'ta' => -13318, 'tai' => -13147, 'tan' => -13138, 'tang' => -13120, 'tao' => -13107, 'te' => -13096, 'teng' => -13095, 'ti' => -13091, 'tian' => -13076, 'tiao' => -13068, 'tie' => -13063, 'ting' => -13060, 'tong' => -12888, 'tou' => -12875, 'tu' => -12871, 'tuan' => -12860, 'tui' => -12858, 'tun' => -12852, 'tuo' => -12849,
  40.         'wa' => -12838, 'wai' => -12831, 'wan' => -12829, 'wang' => -12812, 'wei' => -12802, 'wen' => -12607, 'weng' => -12597, 'wo' => -12594, 'wu' => -12585,
  41.         'xi' => -12556, 'xia' => -12359, 'xian' => -12346, 'xiang' => -12320, 'xiao' => -12300, 'xie' => -12120, 'xin' => -12099, 'xing' => -12089, 'xiong' => -12074, 'xiu' => -12067, 'xu' => -12058, 'xuan' => -12039, 'xue' => -11867, 'xun' => -11861,
  42.         'ya' => -11847, 'yan' => -11831, 'yang' => -11798, 'yao' => -11781, 'ye' => -11604, 'yi' => -11589, 'yin' => -11536, 'ying' => -11358, 'yo' => -11340, 'yong' => -11339, 'you' => -11324, 'yu' => -11303, 'yuan' => -11097, 'yue' => -11077, 'yun' => -11067,
  43.         'za' => -11055, 'zai' => -11052, 'zan' => -11045, 'zang' => -11041, 'zao' => -11038, 'ze' => -11024, 'zei' => -11020, 'zen' => -11019, 'zeng' => -11018, 'zha' => -11014, 'zhai' => -10838, 'zhan' => -10832, 'zhang' => -10815, 'zhao' => -10800, 'zhe' => -10790, 'zhen' => -10780, 'zheng' => -10764, 'zhi' => -10587, 'zhong' => -10544, 'zhou' => -10533, 'zhu' => -10519, 'zhua' => -10331, 'zhuai' => -10329, 'zhuan' => -10328, 'zhuang' => -10322, 'zhui' => -10315, 'zhun' => -10309, 'zhuo' => -10307, 'zi' => -10296, 'zong' => -10281, 'zou' => -10274, 'zu' => -10270, 'zuan' => -10262,
  44.         'zui' => -10260, 'zun' => -10256, 'zuo' => -10254
  45.     );
  46.     /**
  47.      * 获取全部拼音,返回拼音的数组,如 '张三丰'  ==>  ['zhang','san','feng']
  48.      * @param $chinese
  49.      * @param string $charset
  50.      * @return array
  51.      */
  52.     public function get_all_py($chinese, $charset = 'utf-8')
  53.     {
  54.         if ($charset != 'gb2312') $chinese = $this->_U2_Utf8_Gb($chinese);
  55.         $py = $this->zh_to_pys($chinese);
  56.         return $py;
  57.     }
  58.     /**
  59.      * 获取拼音首字母,如['zhang','san','feng']  ==> zsf
  60.      * @param $all_pys
  61.      * @return string
  62.      */
  63.     public function get_first_py($all_pys)
  64.     {
  65.         if (count($all_pys) <= 0) {
  66.             return '';
  67.         }
  68.         $result = [];
  69.         foreach ($all_pys as $one) {
  70.             if (is_null($one) || strlen($one) <= 0) {
  71.                 continue;
  72.             }
  73.             $result[] = substr($one, 0, 1);
  74.         }
  75.         return join('', $result);
  76.     }
  77.     /**
  78.      * 获取拼音首字母,如['zhang','san','feng']  ==> z
  79.      * @param $all_pys
  80.      * @return string
  81.      */
  82.     public function get_first_letter($all_pys)
  83.     {
  84.         if (count($all_pys) <= 0) {
  85.             return '';
  86.         }
  87.         foreach ($all_pys as $one) {
  88.             if (is_null($one) || strlen($one) <= 0) {
  89.                 continue;
  90.             }
  91.             return substr($one, 0, 1);
  92.         }
  93.         return '';
  94.     }
  95.     private function _U2_Utf8_Gb($_C)
  96.     {
  97.         $_String = '';
  98.         if ($_C < 0x80) $_String .= $_C;
  99.         elseif ($_C < 0x800) {
  100.             $_String .= chr(0xC0 | $_C >> 6);
  101.             $_String .= chr(0x80 | $_C & 0x3F);
  102.         } elseif ($_C < 0x10000) {
  103.             $_String .= chr(0xE0 | $_C >> 12);
  104.             $_String .= chr(0x80 | $_C >> 6 & 0x3F);
  105.             $_String .= chr(0x80 | $_C & 0x3F);
  106.         } elseif ($_C < 0x200000) {
  107.             $_String .= chr(0xF0 | $_C >> 18);
  108.             $_String .= chr(0x80 | $_C >> 12 & 0x3F);
  109.             $_String .= chr(0x80 | $_C >> 6 & 0x3F);
  110.             $_String .= chr(0x80 | $_C & 0x3F);
  111.         }
  112.         return iconv('UTF-8', 'GB2312', $_String);
  113.     }
  114.     private function zh_to_py($num, $blank = '')
  115.     {
  116.         if ($num > 0 && $num < 160) {
  117.             return chr($num);
  118.         } elseif ($num < -20319 || $num > -10247) {
  119.             return $blank;
  120.         } else {
  121.             foreach ($this->dict_list as $py => $code) {
  122.                 if ($code > $num) break;
  123.                 $result = $py;
  124.             }
  125.             return $result;
  126.         }
  127.     }
  128.     private function zh_to_pys($chinese)
  129.     {
  130.         $result = array();
  131.         for ($i = 0; $i < strlen($chinese); $i++) {
  132.             $p = ord(substr($chinese, $i, 1));
  133.             if ($p > 160) {
  134.                 $q = ord(substr($chinese, ++$i, 1));
  135.                 $p = $p * 256 + $q - 65536;
  136.             }
  137.             $result[] = $this->zh_to_py($p);
  138.         }
  139.         return $result;
  140.     }
  141. }
"小礼物走一走,来肥宅自学平台支持我"
评论区

评论

共条评论
  • 这篇文章还没有收到评论,赶紧来抢沙发吧~
客服中心在线客服
全心全意为平台用户服务
Copyright © 2017-2023 自学平台网站地图:去查看>
  • 肥宅人自己的自学平台
  • 人人都能成为平台讲师
  • 贡献你的力量壮大肥宅自学平台
  • 将自己的能力变现
  • 实现人生的第二职业
网址收藏平台安卓APP
微信公众号微信公众号

本站部分图片或者资源来自程序自动采集或卖家(商家)发布,如果侵犯了您的权益请与我们联系,我们将在24小时内删除!谢谢!

肥宅自学教程网是一个主打IT视频教程、自媒体运行、摄影剪辑等内容的资源学习交流平台。