01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
Index: ext/syck/emitter.c
===================================================================
RCS file: /src/ruby/ext/syck/emitter.c,v
retrieving revision 1.16
diff -u -b -r1.16 emitter.c
--- ext/syck/emitter.c 2 Feb 2006 15:02:49 -0000 1.16
+++ ext/syck/emitter.c 4 Jun 2006 19:53:26 -0000
@@ -776,25 +776,56 @@
syck_emitter_escape( SyckEmitter *e, char *src, long len )
{
int i;
+ unsigned int c1, c2, c3;
for( i = 0; i < len; i++ )
{
- if( (src[i] < 0x20) || (0x7E < src[i]) )
+ c1 = src[i];
+ if (i + 1 < len)
+ c2 = src[i+1];
+ if (i + 2 < len)
+ c3 = src[i+2];
+
+ if( (0x20 <= c1) && (c1 <= 0x7E) )
{
+ syck_emitter_write( e, src + i, 1 );
+ if( '\\' == c1 )
syck_emitter_write( e, "\\", 1 );
- if( '\0' == src[i] )
- syck_emitter_write( e, "0", 1 );
- else
- {
- syck_emitter_write( e, "x", 1 );
- syck_emitter_write( e, (char *)hex_table + ((src[i] & 0xF0) >> 4), 1 );
- syck_emitter_write( e, (char *)hex_table + (src[i] & 0x0F), 1 );
}
+ else if ( ((i + 1) < len) && /* UTF-8 - 2 bytes */
+ (0xc2 <= c1) && (c1 <= 0xdf) &&
+ (0x80 <= c2) && (c2 <= 0xbf) )
+ {
+ syck_emitter_write( e, src + i, 2 );
+ i++;
+ }
+ else if ( ((i + 2) < len) && /* UTF-8 - 3 bytes */
+ ( (0xe0 == c1) &&
+ (0xa0 <= c2) && (c2 <= 0xbf) &&
+ (0x80 <= c3) && (c3 <= 0xbf) ) ||
+ ( (0xe1 <= c1) && (c1 <= 0xec) &&
+ (0x80 <= c2) && (c2 <= 0xbf) &&
+ (0x80 <= c3) && (c3 <= 0xbf) ) ||
+ ( (0xed == c1) &&
+ (0x80 <= c2) && (c2 <= 0x9f) &&
+ (0x80 <= c3) && (c3 <= 0xbf) ) ||
+ ( (0xee <= c1) && (c1 <= 0xef) &&
+ (0x80 <= c2) && (c2 <= 0xbf) &&
+ (0x80 <= c3) && (c3 <= 0xbf) ) )
+ {
+ syck_emitter_write( e, src + i, 3 );
+ i += 2;
}
else
{
- syck_emitter_write( e, src + i, 1 );
- if( '\\' == src[i] )
syck_emitter_write( e, "\\", 1 );
+ if( '\0' == c1 )
+ syck_emitter_write( e, "0", 1 );
+ else
+ {
+ syck_emitter_write( e, "x", 1 );
+ syck_emitter_write( e, (char *)hex_table + ((c1 & 0xF0) >> 4), 1 );
+ syck_emitter_write( e, (char *)hex_table + (c1 & 0x0F), 1 );
+ }
}
}
}
@@ -849,12 +880,13 @@
/*
* Outputs a double-quoted block.
*/
-void syck_emit_2quoted( SyckEmitter *e, int width, char *str, long len )
+void syck_emit_2quoted( SyckEmitter *e, int width, unsigned char *str, long len )
{
char do_indent = 0;
- char *mark = str;
- char *start = str;
- char *end = str;
+ unsigned char *mark = str;
+ unsigned char *start = str;
+ unsigned char *end = str;
+ unsigned int c, c2, c3;
syck_emitter_write( e, "\"", 1 );
while ( mark < str + len ) {
if ( do_indent > 0 ) {
@@ -864,21 +896,34 @@
syck_emit_indent( e );
do_indent = 0;
}
- switch ( *mark ) {
-
- /* Escape sequences allowed within double quotes. */
- case '"': syck_emitter_write( e, "\\\"", 2 ); break;
- case '\\': syck_emitter_write( e, "\\\\", 2 ); break;
- case '\0': syck_emitter_write( e, "\\0", 2 ); break;
- case '\a': syck_emitter_write( e, "\\a", 2 ); break;
- case '\b': syck_emitter_write( e, "\\b", 2 ); break;
- case '\f': syck_emitter_write( e, "\\f", 2 ); break;
- case '\r': syck_emitter_write( e, "\\r", 2 ); break;
- case '\t': syck_emitter_write( e, "\\t", 2 ); break;
- case '\v': syck_emitter_write( e, "\\v", 2 ); break;
- case 0x1b: syck_emitter_write( e, "\\e", 2 ); break;
- case '\n':
+ c = *mark;
+ if ( mark + 1 < str + len)
+ c2 = *(mark + 1);
+ if ( mark + 2 < str + len)
+ c3 = *(mark + 2);
+ if (c == '"') /* Escape sequences allowed within double quotes. */
+ syck_emitter_write( e, "\\\"", 2 );
+ else if (c == '\\')
+ syck_emitter_write( e, "\\\\", 2 );
+ else if (c == '\0')
+ syck_emitter_write( e, "\\0", 2 );
+ else if (c == '\a')
+ syck_emitter_write( e, "\\a", 2 );
+ else if (c == '\b')
+ syck_emitter_write( e, "\\b", 2 );
+ else if (c == '\f')
+ syck_emitter_write( e, "\\f", 2 );
+ else if (c == '\r')
+ syck_emitter_write( e, "\\r", 2 );
+ else if (c == '\t')
+ syck_emitter_write( e, "\\t", 2 );
+ else if (c == '\v')
+ syck_emitter_write( e, "\\v", 2 );
+ else if (c == 0x1b)
+ syck_emitter_write( e, "\\e", 2 );
+ else if (c == '\n')
+ {
end = mark + 1;
syck_emitter_write( e, "\\n", 2 );
do_indent = 2;
@@ -886,20 +931,47 @@
if ( start < str + len && ( *start == ' ' || *start == '\n' ) ) {
do_indent = 0;
}
- break;
-
- case ' ':
+ }
+ else if (c == ' ')
+ {
if ( width > 0 && *start != ' ' && mark - end > width ) {
do_indent = 1;
end = mark + 1;
} else {
syck_emitter_write( e, " ", 1 );
}
- break;
-
- default:
+ }
+ else if ( (0x20 <= c) && (c <= 0x7E) )
+ {
+ syck_emitter_write( e, mark, 1 );
+ }
+ else if ( (mark + 1) < (str + len) && /* UTF-8 - 2 bytes */
+ (0xC2 <= c) && (c <= 0xDF) &&
+ (0x80 <= c2) && (c2 <= 0xBF) )
+ {
+ syck_emitter_write( e, mark, 2 );
+ mark++;
+ }
+ else if ( (mark + 2) < (str + len) && /* UTF-8 - 3 bytes */
+ ( (0xE0 == c) &&
+ (0xA0 <= c2) && (c2 <= 0xBF) &&
+ (0x80 <= c3) && (c3 <= 0xBF) ) ||
+ ( (0xE1 <= c) && (c <= 0xEC) &&
+ (0x80 <= c2) && (c2 <= 0xBF) &&
+ (0x80 <= c3) && (c3 <= 0xBF) ) ||
+ ( (0xED == c) &&
+ (0x80 <= c2) && (c2 <= 0x9F) &&
+ (0x80 <= c3) && (c3 <= 0xBF) ) ||
+ ( (0xEE <= c) && (c <= 0xEF) &&
+ (0x80 <= c2) && (c2 <= 0xBF) &&
+ (0x80 <= c3) && (c3 <= 0xBF) ) )
+ {
+ syck_emitter_write( e, mark, 3 );
+ mark += 2;
+ }
+ else
+ {
syck_emitter_escape( e, mark, 1 );
- break;
}
mark++;
}
Index: ext/syck/syck.h
===================================================================
RCS file: /src/ruby/ext/syck/syck.h,v
retrieving revision 1.30
diff -u -b -r1.30 syck.h
--- ext/syck/syck.h 19 Dec 2005 14:13:27 -0000 1.30
+++ ext/syck/syck.h 4 Jun 2006 19:53:26 -0000
@@ -376,7 +376,7 @@
void syck_emit( SyckEmitter *, st_data_t );
void syck_emit_scalar( SyckEmitter *, char *, enum scalar_style, int, int, char, char *, long );
void syck_emit_1quoted( SyckEmitter *, int, char *, long );
-void syck_emit_2quoted( SyckEmitter *, int, char *, long );
+void syck_emit_2quoted( SyckEmitter *, int, unsigned char *, long );
void syck_emit_folded( SyckEmitter *, int, char, char *, long );
void syck_emit_literal( SyckEmitter *, char, char *, long );
void syck_emit_seq( SyckEmitter *, char *, enum seq_style );
Index: test/yaml/test_yaml.rb
===================================================================
RCS file: /src/ruby/test/yaml/test_yaml.rb,v
retrieving revision 1.18
diff -u -b -r1.18 test_yaml.rb
--- test/yaml/test_yaml.rb 16 Jan 2006 01:28:52 -0000 1.18
+++ test/yaml/test_yaml.rb 4 Jun 2006 19:53:27 -0000
@@ -1272,6 +1272,11 @@
assert_equal([{}], o.keys)
end
+ def test_toyaml_rawutf8_string
+ # Japanese Hiragana: "AIUEO"
+ assert_equal("--- \"abcdefghijklmnopqrstuvwxyzあいうえお\"\n",
+ "abcdefghijklmnopqrstuvwxyzあいうえお".to_yaml)
+ end
end
if $0 == __FILE__