gb 和 unicode 间的编码转换
下面的例子是将 gb 转换为 ;这种形式
php以后的iconv函数很好用的只是需要自己写一个uft到unicode的转换函数
查表(gbtxt)也行
<?
$text = 电子书库;
preg_match_all(/[\x\xff]?/$text$ar);
foreach($ar[] as $v)
echo utf_unicode(iconv(GBUTF$v));;
?>
<?
// utf > unicode
function utf_unicode($c) {
switch(strlen($c)) {
case :
return ord($c);
case :
$n = (ord($c[]) & xf) << ;
$n += ord($c[]) & xf;
return $n;
case :
$n = (ord($c[]) & xf) << ;
$n += (ord($c[]) & xf) << ;
$n += ord($c[]) & xf;
return $n;
case :
$n = (ord($c[]) & xf) << ;
$n += (ord($c[]) & xf) << ;
$n += (ord($c[]) & xf) << ;
$n += ord($c[]) & xf;
return $n;
}
}
?>
下面的例子是利用php将;这中编码转换为gb
<?php
$str = TTL;;;;;;;;
$str = preg_replace(|([]{});| \uutfgb(\\)\ $str);
$str = \$str=\$str\;;
eval($str);
echo $str;
function uutfgb($c){
$str=;
if ($c < x) {
$str=$c;
} else if ($c < x) {
$str=chr(xC | $c>>);
$str=chr(x | $c & xF);
} else if ($c < x) {
$str=chr(xE | $c>>);
$str=chr(x | $c>> & xF);
$str=chr(x | $c & xF);
} else if ($c < x) {
$str=chr(xF | $c>>);
$str=chr(x | $c>> & xF);
$str=chr(x | $c>> & xF);
$str=chr(x | $c & xF);
}
return iconv(UTF GB $str);
}
?>
或者是
function unescape($str) {
$str = rawurldecode($str);
preg_match_all(/(?:%u{})|{};|\d+;|+/U$str$r);
$ar = $r[];
print_r($ar);
foreach($ar as $k=>$v) {
if(substr($v) == %u)
$ar[$k] = iconv(UCSGBpack(Hsubstr($v)));
elseif(substr($v) == )
$ar[$k] = iconv(UCSGBpack(Hsubstr($v)));
elseif(substr($v) == ) {
echo substr($v)<br>;
$ar[$k] = iconv(UCSGBpack(nsubstr($v)));
}
}
return join($ar);
}
$str = TTL;;;;;;;;
echo unescape($str); //out TTL全天候自动聚焦
利用javascript来转换
<style>
BODY {
FONTSIZE: pt; PADDINGRIGHT: px; PADDINGLEFT: px; PADDINGBOTTOM: px; PADDINGTOP: px;
}
input {
FONTSIZE: pt; height: pt;
}
</style>
<script language=JavaScript>
/*
This following code are designed and writen by Windy_sk <>
You can use it freely but u must held all the copyright items!
*/
function StrUnicode(str){
var arr = new Array();
for(var i=;i<strlength;i++){
arr[i]= + strcharCodeAt(i) + ;;
}
return(arrtoString()replace(//g));
}
function UnicodeoStr(str){
var re=/[\dafAF]{};/ig;
var arr=strmatch(re);
if(arr==null)return();
for(var i=;i<arrlength;i++){
arr[i]=StringfromCharCode(arr[i]replace(/[]/g));
}
return(arrtoString()replace(//g))
}
function modi_str(){
if(dthodchecked){
if(documentalltextdecodevalue!=){
documentalltextencodevalue = StrUnicode(documentalltextdecodevalue);
}else{
documentalltextdecodevalue = UnicodeoStr(documentalltextencodevalue);
}
}else{
if(documentalltextencodevalue!=){
documentalltextdecodevalue = UnicodeoStr(documentalltextencodevalue);
}else{
documentalltextencodevalue = StrUnicode(documentalltextdecodevalue);
}
}
}
</script>
<title>Unicode</title>
<form name=text>
文本原型<br>
<textarea name=decode cols= rows=></textarea>
<br>
转换代码<br>
<textarea name=encode cols= rows=></textarea>
<br>
<input type=checkbox name=method checked> 正向转换
<input type=button onclick=modi_str() value= 确 定 >
<input type=reset value= 清 空 >
<input type=button onclick=dthodchecked?documentalltextencodeselect():documentalltextdecodeselect() value= 全 选 >
</form>
下面是一个显示所有全角半角的字体的查看例子
<style>
BODY {
FONTSIZE: pt; PADDINGRIGHT: px; PADDINGLEFT: px; PADDINGBOTTOM: px; PADDINGTOP: px;
}
input {
FONTSIZE: pt; height: pt;
}
</style>
<script>
function showUni(minmax){
showdocumentopen();
showdocumentwriteln(<style>body{fontsize:pt;wordbreak:breakall;}</style>);
showdocumentwriteln(min + + max + <br><br>);
var i=;
for(i=min;i<=max;i++){
showdocumentwrite( + i + ;);
}
showdocumentclose();
}
</script>
<input type=button value=半角onclick=showUni()>
<input type=button value=全角onclick=showUni()>
<input type=button value=中文 onclick=showUni()>
<input type=button value=中文 onclick=showUni()>
<input type=button value=日文平onclick=showUni()>
<input type=button value=日文片onclick=showUni()>
<input type=button value=韩文onclick=showUni()>
<br>自定义<input name=min> <input name=max>
<input type=button value=察看 onclick=showUni(parseInt(documentallminvalue)parseInt(documentallmaxvalue))>
<br>
<iframe src=about:blank id=show width=% height=% scroll=no></iframe>
下面是一个查表(gb)转换gb到utf的例子 现在有iconv函数这个已经没有太大的意义了
<?
function gbutf($gb){
if(!trim($gb))return $gb;
$filename=gbtxt;
$tmp=file($filename);
$codetable=array();
while(list($key$value)=each($tmp))
$codetable[hexdec(substr($value))]=substr($value);
$utf=;
while($gb) {
if (ord(substr($gb))>) {
$this=substr($gb);
$gb=substr($gbstrlen($gb));
$utf=uutf(hexdec($codetable[hexdec(binhex($this))x]));
}else{
$this=substr($gb);
$gb=substr($gbstrlen($gb));
$utf=uutf($this);
}
}
return $utf;
}
function uutf($c){
$str=;
if ($c < x) {
$str=$c;
} else if ($c < x) {
$str=chr(xC | $c>>);
$str=chr(x | $c & xF);
} else if ($c < x) {
$str=chr(xE | $c>>);
$str=chr(x | $c>> & xF);
$str=chr(x | $c & xF);
} else if ($c < x) {
$str=chr(xF | $c>>);
$str=chr(x | $c>> & xF);
$str=chr(x | $c>> & xF);
$str=chr(x | $c & xF);
}
return $str;
}
?>