码迷,mamicode.com
首页 > 其他好文 > 详细

UTF-8编码与Unicode CS2的转换

时间:2014-07-16 17:58:24      阅读:217      评论:0      收藏:0      [点我收藏+]

标签:style   blog   color   for   div   re   

/* Convert a UTF-8 string into a UCS-2 array. */
void tcstrutftoucs(const char *str, uint16_t *ary, int *np){
  assert(str && ary && np);
  const unsigned char *rp = (unsigned char *)str;
  unsigned int wi = 0;
  while(*rp != \0){
    int c = *(unsigned char *)rp;
    if(c < 0x80){
      ary[wi++] = c;
    } else if(c < 0xe0){
      if(rp[1] >= 0x80){
        ary[wi++] = ((rp[0] & 0x1f) << 6) | (rp[1] & 0x3f);
        rp++;
      }
    } else if(c < 0xf0){
      if(rp[1] >= 0x80 && rp[2] >= 0x80){
        ary[wi++] = ((rp[0] & 0xf) << 12) | ((rp[1] & 0x3f) << 6) | (rp[2] & 0x3f);
        rp += 2;
      }
    }
    rp++;
  }
  *np = wi;
}


/* Convert a UCS-2 array into a UTF-8 string. */
int tcstrucstoutf(const uint16_t *ary, int num, char *str){
  assert(ary && num >= 0 && str);
  unsigned char *wp = (unsigned char *)str;
  for(int i = 0; i < num; i++){
    unsigned int c = ary[i];
    if(c < 0x80){
      *(wp++) = c;
    } else if(c < 0x800){
      *(wp++) = 0xc0 | (c >> 6);
      *(wp++) = 0x80 | (c & 0x3f);
    } else {
      *(wp++) = 0xe0 | (c >> 12);
      *(wp++) = 0x80 | ((c & 0xfff) >> 6);
      *(wp++) = 0x80 | (c & 0x3f);
    }
  }
  *wp = \0;
  return (char *)wp - str;
}

 

UTF-8编码与Unicode CS2的转换,布布扣,bubuko.com

UTF-8编码与Unicode CS2的转换

标签:style   blog   color   for   div   re   

原文地址:http://www.cnblogs.com/feika/p/3847503.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!