Summary
Ruby extension for charset conversion.
Abstract
Iconv is a wrapper class for the UNIX 95
iconv() function family, which translates string between
various encoding systems.
See Open Group's on-line documents for more details.
-
iconv.h: www.opengroup.org/onlinepubs/007908799/xsh/iconv.h.html -
iconv_open(): www.opengroup.org/onlinepubs/007908799/xsh/iconv_open.html -
iconv(): www.opengroup.org/onlinepubs/007908799/xsh/iconv.html -
iconv_close(): www.opengroup.org/onlinepubs/007908799/xsh/iconv_close.html
Which coding systems are available is platform-dependent.
Examples
-
Simple conversion between two charsets.
converted_text = Iconv.conv('iso-8859-15', 'utf-8', text) -
Instantiate a new Iconv and use method #iconv.
cd = Iconv.new(to, from) begin input.each { |s| output << cd.iconv(s) } output << cd.iconv(nil) # Don't forget this! ensure cd.close end -
Invoke ::open with a block.
Iconv.open(to, from) do |cd| input.each { |s| output << cd.iconv(s) } output << cd.iconv(nil) end -
Shorthand for (3).
Iconv.iconv(to, from, *input.to_a)
Attentions
Even if some extentions of implementation dependent are useful, DON'T USE those extentions in libraries and scripts to widely distribute. If you want to use those feature, use String#encode.
- MODULE Iconv::Failure
- CLASS Iconv::BrokenLibrary
- CLASS Iconv::IllegalSequence
- CLASS Iconv::InvalidCharacter
- CLASS Iconv::InvalidEncoding
- CLASS Iconv::OutOfRange
- C
- D
- I
- L
- N
- O
- T
Returns the map from canonical name to system dependent name.
Source: show
static VALUE
charset_map_get(void)
{
return charset_map;
}
Source: show
static VALUE
iconv_s_conv(VALUE self, VALUE to, VALUE from, VALUE str)
{
struct iconv_env_t arg;
arg.argc = 1;
arg.argv = &str;
arg.append = rb_str_append;
arg.ret = rb_str_new(0, 0);
arg.cd = iconv_create(to, from, NULL, &arg.toidx);
return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
}
Returns available iconvctl() method list.
Source: show
static VALUE
iconv_s_ctlmethods(VALUE klass)
{
VALUE ary = rb_ary_new();
#ifdef ICONV_TRIVIALP
rb_ary_push(ary, ID2SYM(rb_intern("trivial?")));
#endif
#ifdef ICONV_GET_TRANSLITERATE
rb_ary_push(ary, ID2SYM(rb_intern("transliterate?")));
#endif
#ifdef ICONV_SET_TRANSLITERATE
rb_ary_push(ary, ID2SYM(rb_intern("transliterate=")));
#endif
#ifdef ICONV_GET_DISCARD_ILSEQ
rb_ary_push(ary, ID2SYM(rb_intern("discard_ilseq?")));
#endif
#ifdef ICONV_SET_DISCARD_ILSEQ
rb_ary_push(ary, ID2SYM(rb_intern("discard_ilseq=")));
#endif
return ary;
}
Shorthand for
Iconv.open(to, from) { |cd|
(strs + [nil]).collect { |s| cd.iconv(s) }
}
Parameters
to, from-
see ::new
strs-
strings to be converted
Exceptions
Source: show
static VALUE
iconv_s_iconv(int argc, VALUE *argv, VALUE self)
{
struct iconv_env_t arg;
if (argc < 2) /* needs `to' and `from' arguments at least */
rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", argc, 2);
arg.argc = argc -= 2;
arg.argv = argv + 2;
arg.append = rb_ary_push;
arg.ret = rb_ary_new2(argc);
arg.cd = iconv_create(argv[0], argv[1], NULL, &arg.toidx);
return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
}
Iterates each alias sets.
Source: show
static VALUE
iconv_s_list(void)
{
#ifdef HAVE_ICONVLIST
int state;
VALUE args[2];
args[1] = rb_block_given_p() ? 0 : rb_ary_new();
iconvlist(list_iconv, args);
state = *(int *)args;
if (state) rb_jump_tag(state);
if (args[1]) return args[1];
#elif defined(HAVE___ICONV_FREE_LIST)
char **list;
size_t sz, i;
VALUE ary;
if (__iconv_get_list(&list, &sz)) return Qnil;
ary = rb_ary_new2(sz);
for (i = 0; i < sz; i++) {
rb_ary_push(ary, rb_str_new2(list[i]));
}
__iconv_free_list(list, sz);
if (!rb_block_given_p())
return ary;
for (i = 0; i < RARRAY_LEN(ary); i++) {
rb_yield(RARRAY_PTR(ary)[i]);
}
#endif
return Qnil;
}
Creates new code converter from a coding-system designated with
from to another one designated with to.
Parameters
to-
encoding name for destination
from-
encoding name for source
options-
options for converter
Exceptions
- TypeError
-
if
toorfromaren't String - InvalidEncoding
-
if designated converter couldn't find out
- SystemCallError
-
if
iconv_open(3)fails
Source: show
static VALUE
iconv_initialize(int argc, VALUE *argv, VALUE self)
{
VALUE to, from, options;
struct rb_iconv_opt_t opt;
int idx;
rb_scan_args(argc, argv, "21", &to, &from, &options);
get_iconv_opt(&opt, options);
iconv_free(check_iconv(self));
DATA_PTR(self) = NULL;
DATA_PTR(self) = (void *)ICONV2VALUE(iconv_create(to, from, &opt, &idx));
if (idx >= 0) ENCODING_SET(self, idx);
return self;
}
Equivalent to ::new except that when it is called with a block, it yields with the new instance and closes it, and returns the result which returned from the block.
Source: show
static VALUE
iconv_s_open(int argc, VALUE *argv, VALUE self)
{
VALUE to, from, options, cd;
struct rb_iconv_opt_t opt;
int idx;
rb_scan_args(argc, argv, "21", &to, &from, &options);
get_iconv_opt(&opt, options);
cd = ICONV2VALUE(iconv_create(to, from, &opt, &idx));
self = Data_Wrap_Struct(self, NULL, ICONV_FREE, (void *)cd);
if (idx >= 0) ENCODING_SET(self, idx);
if (rb_block_given_p()) {
return rb_ensure(rb_yield, self, (VALUE(*)())iconv_finish, self);
}
else {
return self;
}
}
Finishes conversion.
After calling this, calling #iconv will cause an exception, but multiple calls of close are guaranteed to end successfully.
Returns a string containing the byte sequence to change the output buffer to its initial shift state.
Source: show
static VALUE
iconv_finish(VALUE self)
{
VALUE cd = check_iconv(self);
if (!cd) return Qnil;
return rb_ensure(iconv_init_state, self, iconv_free, cd);
}
Equivalent to
iconv(nil, str..., nil).join
Source: show
static VALUE
iconv_conv(int argc, VALUE *argv, VALUE self)
{
iconv_t cd = VALUE2ICONV(check_iconv(self));
VALUE str, s;
int toidx = ENCODING_GET(self);
str = iconv_convert(cd, Qnil, 0, 0, toidx, NULL);
if (argc > 0) {
do {
s = iconv_convert(cd, *argv++, 0, -1, toidx, NULL);
if (RSTRING_LEN(s))
rb_str_buf_append(str, s);
} while (--argc);
s = iconv_convert(cd, Qnil, 0, 0, toidx, NULL);
if (RSTRING_LEN(s))
rb_str_buf_append(str, s);
}
return str;
}
Sets discard_ilseq flag.
Source: show
static VALUE
iconv_set_discard_ilseq(VALUE self, VALUE discard_ilseq)
{
int dis = RTEST(discard_ilseq);
iconv_ctl(self, ICONV_SET_DISCARD_ILSEQ, dis);
return self;
}
Returns discard_ilseq flag.
Source: show
static VALUE
iconv_get_discard_ilseq(VALUE self)
{
int dis = 0;
iconv_ctl(self, ICONV_GET_DISCARD_ILSEQ, dis);
if (dis) return Qtrue;
return Qfalse;
}
Converts string and returns the result.
-
If
stris a String, convertsstr[start, length]and returns the converted string. -
If
strisnil, places converter itself into initial shift state and just returns a string containing the byte sequence to change the output buffer to its initial shift state. -
Otherwise, raises an exception.
Parameters
- str
-
string to be converted, or nil
- start
-
starting offset
- length
-
conversion length; nil or -1 means whole the string from start
Exceptions
-
IconvIllegalSequence
-
IconvInvalidCharacter
-
IconvOutOfRange
Examples
See the Iconv documentation.
Source: show
static VALUE
iconv_iconv(int argc, VALUE *argv, VALUE self)
{
VALUE str, n1, n2;
VALUE cd = check_iconv(self);
long start = 0, length = 0, slen = 0;
rb_scan_args(argc, argv, "12", &str, &n1, &n2);
if (!NIL_P(str)) {
VALUE n = rb_str_length(StringValue(str));
slen = NUM2LONG(n);
}
if (argc != 2 || !RTEST(rb_range_beg_len(n1, &start, &length, slen, 0))) {
if (NIL_P(n1) || ((start = NUM2LONG(n1)) < 0 ? (start += slen) >= 0 : start < slen)) {
length = NIL_P(n2) ? -1 : NUM2LONG(n2);
}
}
if (start > 0 || length > 0) {
rb_encoding *enc = rb_enc_get(str);
const char *s = RSTRING_PTR(str), *e = s + RSTRING_LEN(str);
const char *ps = s;
if (start > 0) {
start = (ps = rb_enc_nth(s, e, start, enc)) - s;
}
if (length > 0) {
length = rb_enc_nth(ps, e, length, enc) - ps;
}
}
return iconv_convert(VALUE2ICONV(cd), str, start, length, ENCODING_GET(self), NULL);
}
Sets transliterate flag.
Source: show
static VALUE
iconv_set_transliterate(VALUE self, VALUE transliterate)
{
int trans = RTEST(transliterate);
iconv_ctl(self, ICONV_SET_TRANSLITERATE, trans);
return self;
}