<!-- Set Window Title
Deleted on 2008-03-20 by Peter Lee for separating header.php into: header.php, title, header2.php
-->
Note: part of this article is referenced from RFC2279 - UTF-8, a transformation format of ISO 10646.
I'm kinda practising my C programming skills, and I found some friends had written some codes, but the codes might not be readable. I learned the rules of the conversion between Unicode and UTF-8, and wrote two functions as shown below.
Please be focusing on the functions fnUnicode2UTF8() and fnUTF82Unicode() in the source code. Don't be confused with the program output part in the main() function :-)
<at>
http://www.peterlee.com.cn
http://blog.peterlee.com.cn
#include <stdio.h>
#include <string.h>
#define MAX 6
void fnUnicode2UTF8(unsigned long unicode, char UTF8[])
{
if ( 0x00000000 <= unicode && unicode <= 0x0000007F )
{
UTF8[MAX] = 1;
UTF8[0] = (char)(unicode);
return;
}
if ( 0x00000080 <= unicode && unicode <= 0x000007FF )
{
UTF8[MAX] = 2;
UTF8[0] = (char)(0xC0 | unicode>>6);
UTF8[1] = (char)(0x80 | unicode & 0x3F);
return;
}
if ( 0x00000800 <= unicode && unicode <= 0x0000FFFF )
{
UTF8[MAX] = 3;
UTF8[0] = (char)(0xE0 | unicode>>12);
UTF8[1] = (char)(0x80 | unicode>>6 & 0x3F);
UTF8[2] = (char)(0x80 | unicode & 0x3F);
return;
}
if ( 0x00010000 <= unicode && unicode <= 0x001FFFFF )
{
UTF8[MAX] = 4;
UTF8[0] = (char)(0xF0 | unicode>>18);
UTF8[1] = (char)(0x80 | unicode>>12 & 0x3F);
UTF8[2] = (char)(0x80 | unicode>>6 & 0x3F);
UTF8[3] = (char)(0x80 | unicode & 0x3F);
return;
}
if ( 0x00200000 <= unicode && unicode <= 0x03FFFFFF )
{
UTF8[MAX] = 5;
UTF8[0] = (char)(0xF8 | unicode>>24);
UTF8[1] = (char)(0x80 | unicode>>18 & 0x3F);
UTF8[2] = (char)(0x80 | unicode>>12 & 0x3F);
UTF8[3] = (char)(0x80 | unicode>>6 & 0x3F);
UTF8[4] = (char)(0x80 | unicode & 0x3F);
return;
}
if ( 0x04000000 <= unicode && unicode <= 0x7FFFFFFF )
{
UTF8[MAX] = 6;
UTF8[0] = (char)(0xFC | unicode>>30);
UTF8[1] = (char)(0x80 | unicode>>24 & 0x3F);
UTF8[2] = (char)(0x80 | unicode>>18 & 0x3F);
UTF8[3] = (char)(0x80 | unicode>>12 & 0x3F);
UTF8[4] = (char)(0x80 | unicode>>6 & 0x3F);
UTF8[5] = (char)(0x80 | unicode & 0x3F);
return;
}
}
unsigned long fnUTF82Unicode(char UTF8[])
{
unsigned long unicode = 0;
if ( 0x00 == (unsigned char)(UTF8[0])>>7 )
{
UTF8[MAX] = 1;
unicode = UTF8[0];
return unicode;
}
if ( 0x06 == (unsigned char)(UTF8[0])>>5 )
{
UTF8[MAX] = 2;
unicode = (UTF8[0]&0x1F) << 6;
unicode |= (UTF8[1]&0x3F);
return unicode;
}
if ( 0x0E == (unsigned char)(UTF8[0])>>4 )
{
UTF8[MAX] = 3;
unicode = (UTF8[0]&0x0F) << 12;
unicode |= (UTF8[1]&0x3F) << 6;
unicode |= (UTF8[2]&0x3F);
return unicode;
}
if ( 0x1E == (unsigned char)(UTF8[0])>>3 )
{
UTF8[MAX] = 4;
unicode = (UTF8[0]&0x07) << 18;
unicode |= (UTF8[1]&0x3F) << 12;
unicode |= (UTF8[2]&0x3F) << 6;
unicode |= (UTF8[3]&0x3F);
return unicode;
}
if ( 0x3E == (unsigned char)(UTF8[0])>>2 )
{
UTF8[MAX] = 5;
unicode = (UTF8[0]&0x03) << 24;
unicode |= (UTF8[1]&0x3F) << 18;
unicode |= (UTF8[2]&0x3F) << 12;
unicode |= (UTF8[3]&0x3F) << 6;
unicode |= (UTF8[4]&0x3F);
return unicode;
}
if ( 0x7E == (unsigned char)(UTF8[0])>>1 )
{
UTF8[MAX] = 6;
unicode = (UTF8[0]&0x01) << 30;
unicode |= (UTF8[1]&0x3F) << 24;
unicode |= (UTF8[2]&0x3F) << 18;
unicode |= (UTF8[3]&0x3F) << 12;
unicode |= (UTF8[4]&0x3F) << 6;
unicode |= (UTF8[5]&0x3F);
return unicode;
}
return 0;
}
char Hex2Bin[23][5] = {"0000", "0001", "0010", "0011",
"0100", "0101", "0110", "0111",
"1000", "1001",
"", "", "", "", "", "", "",
"1010", "1011",
"1100", "1101", "1110", "1111"};
void fnHex2Bin(char hex[], char bin[])
{
int i, len = strlen(hex);
for ( bin[0] = i = 0; i < len; ++i )
strcat ( bin, Hex2Bin[hex[i]-'0'] );
}
int main(int argc, char* argv[])
{
int i;
char UTF8[MAX+1], bin[4*8+1], hex[8+1];
unsigned long unicode = 0x4F60;
printf ("Unicode 2 UTF8:\n");
sprintf ( hex, "%X", unicode );
fnHex2Bin ( hex, bin );
printf ( "Unicode Hex: %s\n", hex );
printf ( "Unicode Bin: %s\n", bin );
fnUnicode2UTF8 ( unicode, UTF8 );
for ( i = 0; i < UTF8[MAX]; ++i )
sprintf ( hex+2*i, "%02X", (unsigned char)(UTF8[i]) );
hex[2*UTF8[MAX]] = 0;
fnHex2Bin ( hex, bin );
printf ( " UTF Hex: %0*s\n", UTF8[MAX]*2, hex );
printf ( " UTF Bin: %s\n", bin );
printf ("\n");
printf ("UTF8 2 Unicode:\n");
printf ( " UTF Hex: %0*s\n", UTF8[MAX]*2, hex );
printf ( " UTF Bin: %s\n", bin );
unicode = fnUTF82Unicode ( UTF8 );
sprintf ( hex, "%X", unicode );
fnHex2Bin ( hex, bin );
printf ( "Unicode Hex: %s\n", hex );
printf ( "Unicode Bin: %s\n", bin );
return 0;
}
分享到:
相关推荐
离线安装包,亲测可用
您可以使用 AWS Schema Conversion Tool (AWS SCT) 将现有的数据库架构从一个数据库引擎转换为另一个数据库引擎。您可以转换关系 OLTP 架构或数据仓库架构。转换后的架构适用于 Amazon Relational Database Service ...
Se-N 共价键的本质:Se-N成键相互作用和非键相互作用的相互转化,谢萌,高军,本文采用密度泛函理论方法计算和分析了三类吡啶衍生物[R-C5H4N, pyridine (R=H), methylpyridine (R=CH3), 4-dimethylamino-pryidine (R...
understanding-and-minimising-adc-conversion-errors-stmicroelectronics.pdf
AbstractƩ-Δ analog-to-digital converters are widely used in motor drives where high signal integrity and galvanic isolation are required. While the Σ-Δ technology itself is well understood, the ...
微小的 4.3描述Tiny-utf8是一个库,用于将Unicode非常轻松地集成到任意C ++ 11项目中。 该库仅由类utf8_string组成,该类utf8_string替代std::string 。 它的实现成功地介于小内存占用和快速访问之间。 因此, std::...
python库,解压后可用。 资源全名:Geode_Conversion-2.12.3-cp38-cp38-win_amd64.whl
python库,解压后可用。 资源全名:Dataconversion-1.0-py3-none-any.whl
资源分类:Python库 所属语言:Python 使用前提:需要解压 资源全名:Geode_Conversion-2.12.2-cp38-cp38-win_amd64.whl 资源来源:官方 安装方法:https://lanzao.blog.csdn.net/article/details/101784059
We propose and experimentally demonstrate mutual optical format conversion between signals characterized as 10-Gb/s nonreturn-to-zero on-off-keying (NRZ-OOK) and NRZ binary phase-shift keying (BPSK) ...
官方离线安装包,亲测可用
资源来自pypi官网。 资源全名:Dataconversion-1.0-py3-none-any.whl
[OPAMP] Principles of Data Conversion System Design (IEEE Press - Behzad Razavi)_p136.pdf
SAP Unicode Conversion Guide
Sampling Theory and Analog-to-Digital Conversion --2016 [204].pdf
python库。 资源全名:Geode_Conversion-2.11.1-cp36-cp36m-win_amd64.whl
资源来自pypi官网。 资源全名:rios.conversion-0.2.1-py2.py3-none-any.whl
查了很多资料,总结一份这三个类型互转的函数,语言用c语言写,工程用visual studio写的,没有的话把相关c文件复制出来用就好