php

位置:IT落伍者 >> php >> 浏览文章

用PHP实现GB2312和Unicode间的编码转换


发布日期:2018年03月02日
 
用PHP实现GB2312和Unicode间的编码转换

gb 和 unicode 间的编码转换

下面的例子是将 gb 转换为 &#;这种形式

php以后的iconv函数很好用的只是需要自己写一个uft到unicode的转换函数

查表(gbtxt)也行

<?

$text = 电子书库;

preg_match_all(/[\x\xff]?/$text$ar);

foreach($ar[] as $v)

echo &#utf_unicode(iconv(GBUTF$v));;

?>

<?

// utf > unicode

function utf_unicode($c) {

switch(strlen($c)) {

case :

return ord($c);

case :

$n = (ord($c[]) & xf) << ;

$n += ord($c[]) & xf;

return $n;

case :

$n = (ord($c[]) & xf) << ;

$n += (ord($c[]) & xf) << ;

$n += ord($c[]) & xf;

return $n;

case :

$n = (ord($c[]) & xf) << ;

$n += (ord($c[]) & xf) << ;

$n += (ord($c[]) & xf) << ;

$n += ord($c[]) & xf;

return $n;

}

}

?>

下面的例子是利用php将&#;这中编码转换为gb

<?php

$str = TTL&#;&#;&#;&#;&#;&#;&#;;

$str = preg_replace(|&#([]{});| \uutfgb(\\)\ $str);

$str = \$str=\$str\;;

eval($str);

echo $str;

function uutfgb($c){

$str=;

if ($c < x) {

$str=$c;

} else if ($c < x) {

$str=chr(xC | $c>>);

$str=chr(x | $c & xF);

} else if ($c < x) {

$str=chr(xE | $c>>);

$str=chr(x | $c>> & xF);

$str=chr(x | $c & xF);

} else if ($c < x) {

$str=chr(xF | $c>>);

$str=chr(x | $c>> & xF);

$str=chr(x | $c>> & xF);

$str=chr(x | $c & xF);

}

return iconv(UTF GB $str);

}

?>

或者是

function unescape($str) {

$str = rawurldecode($str);

preg_match_all(/(?:%u{})|&#x{};|&#\d+;|+/U$str$r);

$ar = $r[];

print_r($ar);

foreach($ar as $k=>$v) {

if(substr($v) == %u)

$ar[$k] = iconv(UCSGBpack(Hsubstr($v)));

elseif(substr($v) == &#x)

$ar[$k] = iconv(UCSGBpack(Hsubstr($v)));

elseif(substr($v) == &#) {

echo substr($v)<br>;

$ar[$k] = iconv(UCSGBpack(nsubstr($v)));

}

}

return join($ar);

}

$str = TTL&#;&#;&#;&#;&#;&#;&#;;

echo unescape($str); //out TTL全天候自动聚焦

利用javascript来转换

<style>

BODY {

FONTSIZE: pt; PADDINGRIGHT: px; PADDINGLEFT: px; PADDINGBOTTOM: px; PADDINGTOP: px;

}

input {

FONTSIZE: pt; height: pt;

}

</style>

<script language=JavaScript>

/*

This following code are designed and writen by Windy_sk <>

You can use it freely but u must held all the copyright items!

*/

function StrUnicode(str){

var arr = new Array();

for(var i=;i<strlength;i++){

arr[i]=&# + strcharCodeAt(i) + ;;

}

return(arrtoString()replace(//g));

}

function UnicodeoStr(str){

var re=/&#[\dafAF]{};/ig;

var arr=strmatch(re);

if(arr==null)return();

for(var i=;i<arrlength;i++){

arr[i]=StringfromCharCode(arr[i]replace(/[&#;]/g));

}

return(arrtoString()replace(//g))

}

function modi_str(){

if(dthodchecked){

if(documentalltextdecodevalue!=){

documentalltextencodevalue = StrUnicode(documentalltextdecodevalue);

}else{

documentalltextdecodevalue = UnicodeoStr(documentalltextencodevalue);

}

}else{

if(documentalltextencodevalue!=){

documentalltextdecodevalue = UnicodeoStr(documentalltextencodevalue);

}else{

documentalltextencodevalue = StrUnicode(documentalltextdecodevalue);

}

}

}

</script>

<title>Unicode</title>

<form name=text>

文本原型<br>

<textarea name=decode cols= rows=></textarea>

<br>

转换代码<br>

<textarea name=encode cols= rows=></textarea>

<br>

<input type=checkbox name=method checked> 正向转换

<input type=button onclick=modi_str() value= 确 定 >

<input type=reset value= 清 空 >

<input type=button onclick=dthodchecked?documentalltextencodeselect():documentalltextdecodeselect() value= 全 选 >

</form>

下面是一个显示所有全角半角的字体的查看例子

<style>

BODY {

FONTSIZE: pt; PADDINGRIGHT: px; PADDINGLEFT: px; PADDINGBOTTOM: px; PADDINGTOP: px;

}

input {

FONTSIZE: pt; height: pt;

}

</style>

<script>

function showUni(minmax){

showdocumentopen();

showdocumentwriteln(<style>body{fontsize:pt;wordbreak:breakall;}</style>);

showdocumentwriteln(min + + max + <br><br>);

var i=;

for(i=min;i<=max;i++){

showdocumentwrite(&# + i + ;);

}

showdocumentclose();

}

</script>

<input type=button value=半角onclick=showUni()>

<input type=button value=全角onclick=showUni()>

<input type=button value=中文 onclick=showUni()>

<input type=button value=中文 onclick=showUni()>

<input type=button value=日文平onclick=showUni()>

<input type=button value=日文片onclick=showUni()>

<input type=button value=韩文onclick=showUni()>

<br>自定义<input name=min> <input name=max>

<input type=button value=察看 onclick=showUni(parseInt(documentallminvalue)parseInt(documentallmaxvalue))>

<br>

<iframe src=about:blank id=show width=% height=% scroll=no></iframe>

下面是一个查表(gb)转换gb到utf的例子 现在有iconv函数这个已经没有太大的意义了

<?

function gbutf($gb){

if(!trim($gb))return $gb;

$filename=gbtxt;

$tmp=file($filename);

$codetable=array();

while(list($key$value)=each($tmp))

$codetable[hexdec(substr($value))]=substr($value);

$utf=;

while($gb) {

if (ord(substr($gb))>) {

$this=substr($gb);

$gb=substr($gbstrlen($gb));

$utf=uutf(hexdec($codetable[hexdec(binhex($this))x]));

}else{

$this=substr($gb);

$gb=substr($gbstrlen($gb));

$utf=uutf($this);

}

}

return $utf;

}

function uutf($c){

$str=;

if ($c < x) {

$str=$c;

} else if ($c < x) {

$str=chr(xC | $c>>);

$str=chr(x | $c & xF);

} else if ($c < x) {

$str=chr(xE | $c>>);

$str=chr(x | $c>> & xF);

$str=chr(x | $c & xF);

} else if ($c < x) {

$str=chr(xF | $c>>);

$str=chr(x | $c>> & xF);

$str=chr(x | $c>> & xF);

$str=chr(x | $c & xF);

}

return $str;

}

?>               

上一篇:php:修改NetBeans默认字体

下一篇:关于php 接口问题(php接口主要也就是运用curl,curl函数)