Mercurial > hg > Applications > mh
view sbr/ml_codeconv.c @ 7:c20e4181370f
utf-8 input assumption in case of base64/utf-8
author | kono |
---|---|
date | Sun, 04 Dec 2005 02:30:39 +0900 |
parents | d802748a597d |
children | 77780b728543 |
line wrap: on
line source
/* ml_codeconv.c - (multilingual) code conversion */ /* by takada@seraph.ntt.jp */ /* arranged by MH-plus project */ #ifdef JAPAN #include "../h/mh.h" #include <ctype.h> #include <stdio.h> #include <sys/types.h> #include <sys/stat.h> /* coding system */ typedef int coding_system_t; #define CS_DEFAULT 0 #define CS_JIS7 1 #define CS_JEUC 2 #define CS_SJIS 3 #define CS_UTF8 4 #define CS_NOCONV 99 /* coding system list */ #define CSL_SIZE 3 #define CSL_DISPLAY(csl) ((csl)[0]) #define CSL_FILE(csl) ((csl)[1]) #define CSL_PROCESS(csl) ((csl)[2]) /* codeset status */ #define ASCII 0 #define JISX0208 1 #define IS_JIS7(c) ((0x21 <= (c)) && ((c) <= 0x7e)) #define IS_JEUC(c) ((0xa1 <= (c)) && ((c) <= 0xfe)) #define IS_SJIS1(c) (((0x81 <= (c)) && ((c) <= 0x9f)) || \ ((0xe0 <= (c)) && ((c) <= 0xfc))) #define IS_SJIS2(c) ((0x40 <= (c)) && ((c) <= 0xfc) && ((c) != 0x7f)) /* encoding (mhn) */ #define CE_UNKNOWN 0x00 #define CE_BASE64 0x01 #define CE_QUOTED 0x02 #define CE_8BIT 0x03 #define CE_7BIT 0x04 #define CE_BINARY 0x05 #define CE_EXTENSION 0x06 #define CE_EXTERNAL 0x07 /* for external-body */ /* hold coding system information */ static coding_system_t ml_coding_info[CSL_SIZE] = { CS_DEFAULT, CS_DEFAULT, CS_DEFAULT }; int japan_environ; /* private functions */ static void read_profile(); static coding_system_t coding_to_cs(); static coding_system_t select_coding_system(); static void ml_fputs_sbr(); static void jeuc_fputs(); static void sjis_fputs(); static void jis7_fputs(); static void utf8_fputs(); static void cntrl_putc(); static void ml_conv_sbr(); /* NKF Input/Output pointers */ static unsigned char *nkf_output; static unsigned char *nkf_input; static int nkf_input_ctr; static int nkf_output_ctr; static int nkf_limit; static int nkf_ilimit; static void (*nkf_flush)(unsigned char *,int); static void nkf_open(unsigned char *opt, unsigned char *in,int ilimit, unsigned char *out,int limit,void (*flush)(unsigned char *,int),FILE *fp); static void nkf_continue(unsigned char *in,int ilimit) ; static void nkf_end() ; static void none(unsigned char *a,int b); static void my_flush(unsigned char *out,int out_count); static void extend(unsigned char *out,int out_count); static void my_pretty(unsigned char *out,int out_count); static FILE *nkf_file; /* * Initialize: holding coding system information */ void ml_init() { read_profile(ml_coding_info); } /* get coding system list from profile and environment variable */ static void read_profile(csl) coding_system_t csl[]; { char *s, *default_coding; #if 0 /* We won't refer $LANG nor $LC_CTYPE */ if ((default_coding = getenv("LC_CTYPE")) == NULL) default_coding = getenv("LANG"); #else default_coding = ""; /* for CS_DEFAULT */ #endif if ((s = getenv("MH_DISPLAY_CODING")) == NULL) if ((s = m_find("display-coding")) == NULL) s = default_coding; CSL_DISPLAY(csl) = coding_to_cs(s); if ((s = getenv("MH_FILE_CODING")) == NULL) if ((s = m_find("file-coding")) == NULL) s = default_coding; CSL_FILE(csl) = coding_to_cs(s); if ((s = getenv("MH_PROCESS_CODING")) == NULL) if ((s = m_find("process-coding")) == NULL) s = default_coding; CSL_PROCESS(csl) = coding_to_cs(s); if (CSL_FILE(ml_coding_info) == CS_NOCONV) japan_environ = 0; else japan_environ = 1; } static coding_system_t coding_to_cs(coding) char *coding; { if (*coding == '\0') return CS_DEFAULT; else if (uleq(coding, "ja_JP.JIS7")) return CS_JIS7; else if (uleq(coding, "ja_JP.EUC")) return CS_JEUC; else if (uleq(coding, "ja_JP.EUCjp")) return CS_JEUC; else if (uleq(coding, "ja_JP.SJIS")) return CS_SJIS; else if (uleq(coding, "ja_JP.UTF-8")) return CS_UTF8; else if (uleq(coding, "C")) return CS_NOCONV; /* for backward compatibility */ else if (uleq(coding,"japanese") || uleq(coding,"ja_JP.jis8") || uleq(coding,"ja_JP.pjis") || uleq(coding,"ja_JP.jis") || uleq(coding,"wr_WR.ct") || uleq(coding,"wr_WR.junet")) { return(CS_JIS7); } else if (uleq(coding,"ja_JP.ujis")) { return(CS_JEUC); } else if (uleq(coding,"ja_JP.mscode")) { return(CS_SJIS); } else if (uleq(coding,"noconv")) { return(CS_NOCONV); } else { return(CS_DEFAULT); } } static coding_system_t select_coding_system(stream) FILE *stream; { struct stat buf; if (fstat(fileno(stream), &buf)) adios (NULLCP, "unable to fstat stream"); switch (buf.st_mode & S_IFMT) { case S_IFREG: return(CSL_FILE(ml_coding_info)); case S_IFIFO: case 0: /* some system returns zero-filled stat for pipe */ return(CSL_PROCESS(ml_coding_info)); case S_IFCHR: default: return(CSL_DISPLAY(ml_coding_info)); } } /* * */ int ml_ismlchar(c) unsigned char c; { return japan_environ ? IS_JEUC(c) : 0; } int ml_ismlptr(p) unsigned char *p; { return japan_environ ? (IS_JEUC(*p) && IS_JEUC(*(p+1))) : 0; } /* * Output: */ void ml_fputs(scanlk, stream) char *scanlk; FILE *stream; { ml_fputs_sbr(scanlk, stream, 0); } void ml_pretty_fputs(scanlk, stream) char *scanlk; FILE *stream; { ml_fputs_sbr(scanlk, stream, 1); } void junet_fputs(scanlk, stream) char *scanlk; FILE *stream; { jis7_fputs(scanlk, stream, 0); } static void ml_fputs_sbr(scanlk, stream, pretty) char *scanlk; FILE *stream; int pretty; { switch (select_coding_system(stream)) { case CS_NOCONV: fputs(scanlk, stream); break; case CS_SJIS: sjis_fputs(scanlk, stream, pretty); break; case CS_JEUC: jeuc_fputs(scanlk, stream, pretty); break; case CS_UTF8: utf8_fputs(scanlk, stream, pretty); break; case CS_JIS7: case CS_DEFAULT: default: jis7_fputs(scanlk, stream, pretty); break; } } /* * Output routines with code conversion */ char buf[BUFSIZ]; static void utf8_fputs(scanlk, stream, pretty) char *scanlk; FILE *stream; int pretty; { nkf_open((unsigned char *)"-w80",(unsigned char *)scanlk,-1,(unsigned char *)buf,BUFSIZ,pretty?my_pretty: my_flush,stream); nkf_end(); } static void jeuc_fputs(scanlk, stream, pretty) char *scanlk; FILE *stream; int pretty; { unsigned char u1, u2; while (u1 = *scanlk++) { if (IS_JEUC(u1)) { u2 = *scanlk; if (IS_JEUC(u2)) { scanlk++; putc(u1, stream); putc(u2, stream); continue; } putc(' ', stream); } else if (u1 & 0x80) { putc(' ', stream); } else if (pretty && iscntrl(u1)) { cntrl_putc(u1, stream); } else { putc(u1, stream); } } } #define E2S(i1, i2, o1, o2) {\ (i1) &= 0x7f;\ (i2) &= 0x7f;\ (o1) = ((i1) - 0x21) / 2 + 0x81;\ if ((o1) > 0x9f) { (o1) += (0xe0 - 0xa0); }\ if ((i1) & 1) {\ (o2) = (i2) + (0x40 - 0x21);\ if ((o2) > 0x7e) (o2)++;\ } else {\ (o2) = (i2) + (0xfc - 0x7e);\ }\ } static void sjis_fputs(scanlk, stream, pretty) char *scanlk; FILE *stream; int pretty; { unsigned char u1, u2, s1, s2; while (u1 = *scanlk++) { if (IS_JEUC(u1)) { u2 = *scanlk; if (IS_JEUC(u2)) { scanlk++; E2S(u1, u2, s1, s2); putc(s1, stream); putc(s2, stream); continue; } putc(' ', stream); } else if (u1 & 0x80) { putc(' ', stream); } else if (pretty && iscntrl(u1)) { cntrl_putc(u1, stream); } else { putc(u1, stream); } } } #define DSGNT_JISX0208(stream, status) {\ if (kanji_pos == ASCII) {\ fputs("\033$B", (stream)); (status) = JISX0208;\ }}\ #define DSGNT_ASCII(stream, status) {\ if (kanji_pos == JISX0208) {\ fputs("\033(B", (stream)); (status) = ASCII;\ }}\ static void jis7_fputs(scanlk, stream, pretty) char *scanlk; FILE *stream; int pretty; { int kanji_pos; /* ASCII or JISX0208 */ unsigned char u1, u2; kanji_pos = ASCII; while (u1 = *scanlk++) { if (IS_JEUC(u1)) { u2 = *scanlk; if (IS_JEUC(u2)) { scanlk++; DSGNT_JISX0208(stream, kanji_pos); putc(u1 & 0x7f, stream); putc(u2 & 0x7f, stream); continue; } DSGNT_ASCII(stream, kanji_pos); putc(' ', stream); } else if (u1 & 0x80) { DSGNT_ASCII(stream, kanji_pos); putc(' ', stream); } else if (pretty && iscntrl(u1)) { DSGNT_ASCII(stream, kanji_pos); cntrl_putc(u1, stream); } else { DSGNT_ASCII(stream, kanji_pos); putc(u1, stream); } } DSGNT_ASCII(stream, kanji_pos); } static void cntrl_putc(c, stream) char c; FILE *stream; { switch (c) { case '\b': putc('\\', stream); putc('b', stream); break; case '\f': putc('\\', stream); putc('f', stream); break; case '\n': putc('\\', stream); putc('n', stream); break; case '\r': putc('\\', stream); putc('r', stream); break; case '\t': putc('\\', stream); putc('t', stream); break; default: putc('^', stream); putc(c ^ 0x40, stream); break; } } /* * Input: */ char * ml_conv(s) char *s; { coding_system_t coding; if ((s == NULL) || ((coding = CSL_FILE(ml_coding_info)) == CS_NOCONV)) return(s); ml_conv_sbr(s, coding, CE_UNKNOWN, 0); return(s); } char * ml_conv_decode(s,encode,charset) char *s; int encode,charset; { coding_system_t coding; if ((s == NULL) || ((coding = CSL_FILE(ml_coding_info)) == CS_NOCONV)) return(s); ml_conv_sbr(s, coding, encode, charset); return(s); } /* Convert to EUC shirinking only (?) */ static char * cs_output_opt(int cs) { switch(cs) { case CS_JIS7: return "-j"; case CS_JEUC: return "-e"; case CS_SJIS: return "-s"; case CS_UTF8: return "-w80"; case CS_NOCONV: return "-t"; } return "-t"; } static char * cs_input_opt(int cs, int encode, int input_charset) { switch(encode) { case CE_BASE64: if (input_charset==CS_UTF8) return "-emBW8"; return "-emB"; case CE_QUOTED: if (input_charset==CS_UTF8) return "-emQW8"; return "-emQ"; } switch(cs) { case CS_JIS7: return "-Je"; case CS_JEUC: return "-Ee"; case CS_SJIS: return "-Se"; case CS_UTF8: return "-W8e"; case CS_NOCONV: return "-t"; } return "-e"; } static void ml_conv_sbr(in, cs, encode, input_charset) char *in; coding_system_t cs; int encode, input_charset; { char *opt = cs_input_opt(cs,encode,input_charset); int len = strlen(in); nkf_open((unsigned char *)opt,(unsigned char *)in,len,(unsigned char *)in,len,extend,0); nkf_end(); nkf_output[nkf_output_ctr]=0; } extern void mime_convert(char *ptr) { int len = strlen(ptr); nkf_open((unsigned char *)"-me",(unsigned char *)ptr,len,(unsigned char *)ptr,len,extend,0); nkf_end(); nkf_output[nkf_output_ctr]=0; } #undef CR #undef LIMIT #undef PROTO #undef getc #undef ungetc #define getc(f) nkf_getc(f) #define ungetc(c,f) nkf_ungetc(c,f) static int nkf_getc(FILE *f) { if (nkf_ilimit==-1) { int c = nkf_input[nkf_input_ctr++]; if (c==0) { nkf_input_ctr--; return -1; } return c; } return (nkf_input_ctr>=nkf_ilimit?-1:nkf_input[nkf_input_ctr++]); } static int nkf_ungetc(int c,FILE *f) { nkf_input_ctr--; return c; } #undef putchar #undef TRUE #undef FALSE #define putchar(c) nkf_putchar(c) #define debug nkf_debug static int nkf_putchar(unsigned int c) { /* string length is enough? */ if(nkf_output_ctr<nkf_limit && c!='\n') { return nkf_output[nkf_output_ctr++] = c; } else { nkf_output[nkf_output_ctr++] = c; nkf_flush(nkf_output,nkf_output_ctr); nkf_output_ctr = 0; } return c; } /* Include kanji filter main part */ /* getchar and putchar will be replaced during inclusion */ #define version NKF_version #define PERL_XS 1 #include "../nkf-utf8/utf8tbl.c" #include "../nkf-utf8/nkf.c" /* using opt ( "-w8" etc... ) convert *in into *out when limit or nkf_end flush is called nkf_continue can be use to change input ptr */ static void nkf_open(unsigned char *opt, unsigned char *in,int ilimit, unsigned char *out,int limit,void (*flush)(unsigned char *,int),FILE *fp) { /* Flags are reset at each call. */ reinit(); /* Process flags except the last once */ options(opt); nkf_input_ctr = 0; nkf_input = in; nkf_ilimit = ilimit; nkf_file = fp; nkf_output_ctr = 0; nkf_output = out; nkf_limit = limit; nkf_flush = flush; /* Convestion */ kanji_convert(NULL); } static void nkf_continue(unsigned char *in,int ilimit) { nkf_input_ctr = 0; nkf_input = in; nkf_ilimit = ilimit; /* Convestion */ kanji_convert(NULL); } void nkf_end() { if (nkf_output_ctr) { nkf_flush(nkf_output,nkf_output_ctr); nkf_output_ctr = 0; } } static void extend(unsigned char *out,int out_count) { nkf_output += nkf_output_ctr; } static void none(unsigned char *a,int b) { } static void my_flush(unsigned char *out,int out_count) { fwrite(out,out_count,1,nkf_file); } static void my_pretty(unsigned char *out,int out_count) { int c; FILE *fp = nkf_file; while(out_count-->0) { c = *out++; if (iscntrl(c)) cntrl_putc(c, fp); else putc(c, fp); } } #endif /* JAPAN */ /* end */