1208 lines
44 KiB
Diff
1208 lines
44 KiB
Diff
From de29341638833ba7717bd6b5e6850998454b044b Mon Sep 17 00:00:00 2001
|
|
From: Kevin Atkinson <kevina@gnu.org>
|
|
Date: Sat, 17 Aug 2019 17:06:53 -0400
|
|
Subject: [PATCH] Don't allow null-terminated UCS-2/4 strings using the
|
|
original API.
|
|
|
|
Detect if the encoding is UCS-2/4 and the length is -1 in affected API
|
|
functions and refuse to convert the string. If the string ends up
|
|
being converted somehow, abort with an error message in DecodeDirect
|
|
and ConvDirect. To convert a null terminated string in
|
|
Decode/ConvDirect, a negative number corresponding to the width of the
|
|
underlying character type for the encoding is expected; for example,
|
|
if the encoding is "ucs-2" then a the size is expected to be -2.
|
|
|
|
Also fix a 1-3 byte over-read in DecodeDirect when reading UCS-2/4
|
|
strings when a size is provided (found by OSS-Fuzz).
|
|
|
|
Also fix a bug in DecodeDirect that caused DocumentChecker to return
|
|
the wrong offsets when working with UCS-2/4 strings.
|
|
---
|
|
auto/MkSrc/CcHelper.pm | 99 ++++++++++++++++++++++++++++++++++---
|
|
auto/MkSrc/Create.pm | 5 +-
|
|
auto/MkSrc/Info.pm | 5 +-
|
|
auto/MkSrc/ProcCc.pm | 24 +++++----
|
|
auto/MkSrc/ProcImpl.pm | 57 +++++++++++++++------
|
|
auto/MkSrc/Read.pm | 4 +-
|
|
auto/mk-src.in | 44 +++++++++++++++--
|
|
common/convert.cpp | 39 ++++++++++++---
|
|
common/convert.hpp | 38 +++++++++++++-
|
|
common/document_checker.cpp | 17 ++++++-
|
|
common/document_checker.hpp | 1 +
|
|
configure.ac | 8 +++
|
|
manual/aspell.texi | 58 ++++++++++++++++------
|
|
manual/readme.texi | 67 +++++++++++++++++++++----
|
|
test/cxx_warnings_test.cpp | 84 +++++++++++++++++++++++++++++++
|
|
test/wide_test_invalid.c | 69 ++++++++++++++++++++++++++
|
|
test/wide_test_valid.c | 69 ++++++++++++++++++++++++++
|
|
17 files changed, 618 insertions(+), 70 deletions(-)
|
|
create mode 100644 test/cxx_warnings_test.cpp
|
|
create mode 100644 test/wide_test_invalid.c
|
|
create mode 100644 test/wide_test_valid.c
|
|
|
|
diff --git a/auto/MkSrc/CcHelper.pm b/auto/MkSrc/CcHelper.pm
|
|
index e814a1a..9a96041 100644
|
|
--- a/auto/MkSrc/CcHelper.pm
|
|
+++ b/auto/MkSrc/CcHelper.pm
|
|
@@ -10,8 +10,8 @@ BEGIN {
|
|
use Exporter;
|
|
our @ISA = qw(Exporter);
|
|
our @EXPORT = qw(to_c_return_type c_error_cond
|
|
- to_type_name make_desc make_func call_func
|
|
- make_c_method call_c_method form_c_method
|
|
+ to_type_name make_desc make_func call_func get_c_func_name
|
|
+ make_c_method make_wide_macro call_c_method form_c_method
|
|
make_cxx_method);
|
|
}
|
|
|
|
@@ -90,6 +90,69 @@ sub make_func ( $ \@ $ ; \% ) {
|
|
')'));
|
|
}
|
|
|
|
+=item make_wide_version NAME @TYPES PARMS ; %ACCUM
|
|
+
|
|
+Creates the wide character version of the function if needed
|
|
+
|
|
+=cut
|
|
+
|
|
+sub make_wide_version ( $ \@ $ ; \% ) {
|
|
+ my ($name, $d, $p, $accum) = @_;
|
|
+ my @d = @$d;
|
|
+ shift @d;
|
|
+ return '' unless grep {$_->{type} eq 'encoded string'} @d;
|
|
+ $accum->{sys_headers}{'stddef.h'} = true;
|
|
+ $accum->{suffix}[5] = <<'---';
|
|
+
|
|
+/******************* private implemantion details *********************/
|
|
+
|
|
+#ifdef __cplusplus
|
|
+# define aspell_cast_(type, expr) (static_cast<type>(expr))
|
|
+# define aspell_cast_from_wide_(str) (static_cast<const void *>(str))
|
|
+#else
|
|
+# define aspell_cast_(type, expr) ((type)(expr))
|
|
+# define aspell_cast_from_wide_(str) ((const char *)(str))
|
|
+#endif
|
|
+---
|
|
+ my @parms = map {$_->{type} eq 'encoded string'
|
|
+ ? ($_->{name}, $_->{name}.'_size')
|
|
+ : $_->{name}} @d;
|
|
+ $name = to_lower $name;
|
|
+ $accum->{suffix}[0] = <<'---';
|
|
+/**********************************************************************/
|
|
+
|
|
+#ifdef ASPELL_ENCODE_SETTING_SECURE
|
|
+---
|
|
+ $accum->{suffix}[2] = "#endif\n";
|
|
+ my @args = map {$_->{type} eq 'encoded string'
|
|
+ ? ($_->{name}, "$_->{name}_size", '-1')
|
|
+ : $_->{name}} @d;
|
|
+ $accum->{suffix}[1] .=
|
|
+ (join '',
|
|
+ "#define $name",
|
|
+ '(', join(', ', @parms), ')',
|
|
+ "\\\n ",
|
|
+ $name, '_wide',
|
|
+ '(', join(', ', @args), ')',
|
|
+ "\n");
|
|
+ @args = map {$_->{type} eq 'encoded string'
|
|
+ ? ("aspell_cast_from_wide_($_->{name})",
|
|
+ "$_->{name}_size*aspell_cast_(int,sizeof(*($_->{name})))",
|
|
+ "sizeof(*($_->{name}))")
|
|
+ : $_->{name}} @d;
|
|
+ return (join '',
|
|
+ "\n",
|
|
+ "/* version of $name that is safe to use with (null terminated) wide characters */\n",
|
|
+ '#define ',
|
|
+ $name, '_w',
|
|
+ '(', join(', ', @parms), ')',
|
|
+ "\\\n ",
|
|
+ $name, '_wide',
|
|
+ '(', join(', ', @args), ')',
|
|
+ "\n");
|
|
+}
|
|
+
|
|
+
|
|
=item call_func NAME @TYPES PARMS ; %ACCUM
|
|
|
|
Return a string to call a func. Will prefix the function with return
|
|
@@ -103,7 +166,6 @@ Parms can be any of:
|
|
|
|
sub call_func ( $ \@ $ ; \% ) {
|
|
my ($name, $d, $p, $accum) = @_;
|
|
- $accum = {} unless defined $accum;
|
|
my @d = @$d;
|
|
my $func_ret = to_type_name(shift @d, {%$p,pos=>'return'}, %$accum);
|
|
return (join '',
|
|
@@ -148,8 +210,14 @@ sub to_type_name ( $ $ ; \% ) {
|
|
my $name = $t->{name};
|
|
my $type = $t->{type};
|
|
|
|
- return ( (to_type_name {%$d, type=>'string'}, $p, %$accum) ,
|
|
- (to_type_name {%$d, type=>'int', name=>"$d->{name}_size"}, $p, %$accum) )
|
|
+ if ($name eq 'encoded string' && $is_cc && $pos eq 'parm') {
|
|
+ my @types = ((to_type_name {%$d, type=>($p->{wide}?'const void pointer':'string')}, $p, %$accum),
|
|
+ (to_type_name {%$d, type=>'int', name=>"$d->{name}_size"}, $p, %$accum));
|
|
+ push @types, (to_type_name {%$d, type=>'int', name=>"$d->{name}_type_width"}, $p, %$accum) if $p->{wide};
|
|
+ return @types;
|
|
+ }
|
|
+ return ( (to_type_name {%$d, type=>($p->{wide}?'const void pointer':'string')}, $p, %$accum) ,
|
|
+ (to_type_name {%$d, type=>'int', name=>"$d->{name}_size"}, $p, %$accum) )
|
|
if $name eq 'encoded string' && $is_cc && $pos eq 'parm';
|
|
|
|
my $str;
|
|
@@ -174,7 +242,7 @@ sub to_type_name ( $ $ ; \% ) {
|
|
$str .= "String";
|
|
}
|
|
} elsif ($name eq 'encoded string') {
|
|
- $str .= "const char *";
|
|
+ $str .= $p->{wide} ? "const void *" : "const char *";
|
|
} elsif ($name eq '') {
|
|
$str .= "void";
|
|
} elsif ($name eq 'bool' && $is_cc) {
|
|
@@ -186,7 +254,7 @@ sub to_type_name ( $ $ ; \% ) {
|
|
if ($t->{pointer}) {
|
|
$accum->{types}->{$name} = $t;
|
|
} else {
|
|
- $accum->{headers}->{$t->{created_in}} = true;
|
|
+ $accum->{headers}->{$t->{created_in}} = true unless $mode eq 'cc';
|
|
}
|
|
$str .= "$c_type Aspell" if $mode eq 'cc';
|
|
$str .= to_mixed($name);
|
|
@@ -214,6 +282,7 @@ sub to_type_name ( $ $ ; \% ) {
|
|
return $str;
|
|
}
|
|
|
|
+
|
|
=item make_desc DESC ; LEVEL
|
|
|
|
Make a C comment out of DESC optionally indenting it LEVEL spaces.
|
|
@@ -286,6 +355,7 @@ sub form_c_method ($ $ $ ; \% )
|
|
} else {
|
|
$func = "aspell $class $name";
|
|
}
|
|
+ $func .= " wide" if $p->{wide};
|
|
if (exists $d->{'const'}) {
|
|
splice @data, 1, 0, {type => "const $class", name=> $this_name};
|
|
} else {
|
|
@@ -306,6 +376,21 @@ sub make_c_method ($ $ $ ; \%)
|
|
return &make_func(@ret);
|
|
}
|
|
|
|
+sub get_c_func_name ($ $ $)
|
|
+{
|
|
+ my @ret = &form_c_method(@_);
|
|
+ return undef unless @ret > 0;
|
|
+ return to_lower $ret[0];
|
|
+}
|
|
+
|
|
+sub make_wide_macro ($ $ $ ; \%)
|
|
+{
|
|
+ my @ret = &form_c_method(@_);
|
|
+ return undef unless @ret > 0;
|
|
+ my $str = &make_wide_version(@ret);
|
|
+ return $str;
|
|
+}
|
|
+
|
|
sub call_c_method ($ $ $ ; \%)
|
|
{
|
|
my @ret = &form_c_method(@_);
|
|
diff --git a/auto/MkSrc/Create.pm b/auto/MkSrc/Create.pm
|
|
index e467a10..d76af15 100644
|
|
--- a/auto/MkSrc/Create.pm
|
|
+++ b/auto/MkSrc/Create.pm
|
|
@@ -75,8 +75,10 @@ sub create_cc_file ( % ) {
|
|
$file .= "#include \"aspell.h\"\n" if $p{type} eq 'cxx';
|
|
$file .= "#include \"settings.h\"\n" if $p{type} eq 'native_impl' && $p{name} eq 'errors';
|
|
$file .= "#include \"gettext.h\"\n" if $p{type} eq 'native_impl' && $p{name} eq 'errors';
|
|
+ $file .= cmap {"#include <$_>\n"} sort keys %{$accum{sys_headers}};
|
|
$file .= cmap {"#include \"".to_lower($_).".hpp\"\n"} sort keys %{$accum{headers}};
|
|
- $file .= "#ifdef __cplusplus\nextern \"C\" {\n#endif\n" if $p{header} && !$p{cxx};
|
|
+ $file .= "\n#ifdef __cplusplus\nextern \"C\" {\n#endif\n" if $p{header} && !$p{cxx};
|
|
+ $file .= join('', grep {defined $_} @{$accum{prefix}});
|
|
$file .= "\nnamespace $p{namespace} {\n\n" if $p{cxx};
|
|
if (defined $info{forward}{proc}{$p{type}}) {
|
|
my @types = sort {$a->{name} cmp $b->{name}} (values %{$accum{types}});
|
|
@@ -84,6 +86,7 @@ sub create_cc_file ( % ) {
|
|
}
|
|
$file .= "\n";
|
|
$file .= $body;
|
|
+ $file .= join('', grep {defined $_} @{$accum{suffix}});
|
|
$file .= "\n\n}\n\n" if $p{cxx};
|
|
$file .= "#ifdef __cplusplus\n}\n#endif\n" if $p{header} && !$p{cxx};
|
|
$file .= "#endif /* $hm */\n" if $p{header};
|
|
diff --git a/auto/MkSrc/Info.pm b/auto/MkSrc/Info.pm
|
|
index 5d30ec5..a131b2e 100644
|
|
--- a/auto/MkSrc/Info.pm
|
|
+++ b/auto/MkSrc/Info.pm
|
|
@@ -60,6 +60,7 @@ each proc sub should take the following argv
|
|
the object from which it is a member of
|
|
no native: do not attemt to create a native implementation
|
|
treat as object: treat as a object rather than a pointer
|
|
+ no conv: do not converted an encoded string
|
|
|
|
The %info structure is initialized as follows:
|
|
|
|
@@ -104,8 +105,8 @@ The %info structure is initialized as follows:
|
|
errors => {}, # possible errors
|
|
method => {
|
|
# A class method
|
|
- options => ['desc', 'posib err', 'c func', 'const',
|
|
- 'c only', 'c impl', 'cxx impl'],
|
|
+ options => ['desc', 'posib err', 'c func', 'const', 'no conv', 'on conv error',
|
|
+ 'c only', 'c impl', 'cxx impl', 'cc extra'],
|
|
groups => undef},
|
|
constructor => {
|
|
# A class constructor
|
|
diff --git a/auto/MkSrc/ProcCc.pm b/auto/MkSrc/ProcCc.pm
|
|
index 47c4338..98cc435 100644
|
|
--- a/auto/MkSrc/ProcCc.pm
|
|
+++ b/auto/MkSrc/ProcCc.pm
|
|
@@ -23,7 +23,7 @@ use MkSrc::Info;
|
|
sub make_c_object ( $ @ );
|
|
|
|
$info{group}{proc}{cc} = sub {
|
|
- my ($data) = @_;
|
|
+ my ($data,@rest) = @_;
|
|
my $ret;
|
|
my $stars = (70 - length $data->{name})/2;
|
|
$ret .= "/";
|
|
@@ -33,14 +33,14 @@ $info{group}{proc}{cc} = sub {
|
|
$ret .= "/\n";
|
|
foreach my $d (@{$data->{data}}) {
|
|
$ret .= "\n\n";
|
|
- $ret .= $info{$d->{type}}{proc}{cc}->($d);
|
|
+ $ret .= $info{$d->{type}}{proc}{cc}->($d,@rest);
|
|
}
|
|
$ret .= "\n\n";
|
|
return $ret;
|
|
};
|
|
|
|
$info{enum}{proc}{cc} = sub {
|
|
- my ($d) = @_;
|
|
+ my ($d,@rest) = @_;
|
|
my $n = "Aspell".to_mixed($d->{name});
|
|
return ("\n".
|
|
make_desc($d->{desc}).
|
|
@@ -58,21 +58,26 @@ $info{struct}{proc}{cc} = sub {
|
|
};
|
|
|
|
$info{union}{proc}{cc} = sub {
|
|
- return make_c_object "union", $_[0];
|
|
+ return make_c_object "union", @_;
|
|
};
|
|
|
|
$info{class}{proc}{cc} = sub {
|
|
- my ($d) = @_;
|
|
+ my ($d,$accum) = @_;
|
|
my $class = $d->{name};
|
|
my $classname = "Aspell".to_mixed($class);
|
|
my $ret = "";
|
|
$ret .= "typedef struct $classname $classname;\n\n";
|
|
foreach (@{$d->{data}}) {
|
|
- my $s = make_c_method($class, $_, {mode=>'cc'});
|
|
+ my $s = make_c_method($class, $_, {mode=>'cc'}, %$accum);
|
|
next unless defined $s;
|
|
$ret .= "\n";
|
|
$ret .= make_desc($_->{desc});
|
|
- $ret .= make_c_method($class, $_, {mode=>'cc'}).";\n";
|
|
+ $ret .= make_c_method($class, $_, {mode=>'cc'}, %$accum).";\n";
|
|
+ if (grep {$_->{type} eq 'encoded string'} @{$_->{data}}) {
|
|
+ $ret .= make_c_method($class, $_, {mode=>'cc', wide=>true}, %$accum).";\n";
|
|
+ $ret .= make_wide_macro($class, $_, {mode=>'cc'}, %$accum);
|
|
+ }
|
|
+ $ret .= "\n".$_->{'cc extra'}."\n" if defined $_->{'cc extra'};
|
|
}
|
|
$ret .= "\n";
|
|
return $ret;
|
|
@@ -105,7 +110,8 @@ $info{errors}{proc}{cc} = sub {
|
|
};
|
|
|
|
sub make_c_object ( $ @ ) {
|
|
- my ($t, $d) = @_;
|
|
+ my ($t, $d, $accum) = @_;
|
|
+ $accum = {} unless defined $accum;
|
|
my $struct;
|
|
$struct .= "Aspell";
|
|
$struct .= to_mixed($d->{name});
|
|
@@ -120,7 +126,7 @@ sub make_c_object ( $ @ ) {
|
|
"\n};\n"),
|
|
"typedef $t $struct $struct;",
|
|
join ("\n",
|
|
- map {make_c_method($d->{name}, $_, {mode=>'cc'}).";"}
|
|
+ map {make_c_method($d->{name}, $_, {mode=>'cc'}, %$accum).";"}
|
|
grep {$_->{type} eq 'method'}
|
|
@{$d->{data}})
|
|
)."\n";
|
|
diff --git a/auto/MkSrc/ProcImpl.pm b/auto/MkSrc/ProcImpl.pm
|
|
index b8628fd..3d0f220 100644
|
|
--- a/auto/MkSrc/ProcImpl.pm
|
|
+++ b/auto/MkSrc/ProcImpl.pm
|
|
@@ -45,10 +45,13 @@ $info{class}{proc}{impl} = sub {
|
|
foreach (grep {$_ ne ''} split /\s*,\s*/, $data->{'c impl headers'}) {
|
|
$accum->{headers}{$_} = true;
|
|
}
|
|
- foreach my $d (@{$data->{data}}) {
|
|
+ my @d = @{$data->{data}};
|
|
+ while (@d) {
|
|
+ my $d = shift @d;
|
|
+ my $need_wide = false;
|
|
next unless one_of $d->{type}, qw(method constructor destructor);
|
|
my @parms = @{$d->{data}} if exists $d->{data};
|
|
- my $m = make_c_method $data->{name}, $d, {mode=>'cc_cxx', use_name=>true}, %$accum;
|
|
+ my $m = make_c_method $data->{name}, $d, {mode=>'cc_cxx', use_name=>true, wide=>$d->{wide}}, %$accum;
|
|
next unless defined $m;
|
|
$ret .= "extern \"C\" $m\n";
|
|
$ret .= "{\n";
|
|
@@ -57,24 +60,49 @@ $info{class}{proc}{impl} = sub {
|
|
} else {
|
|
if ($d->{type} eq 'method') {
|
|
my $ret_type = shift @parms;
|
|
- my $ret_native = to_type_name $ret_type, {mode=>'native_no_err', pos=>'return'}, %$accum;
|
|
+ my $ret_native = to_type_name $ret_type, {mode=>'native_no_err', pos=>'return', wide=>$d->{wide}}, %$accum;
|
|
my $snum = 0;
|
|
+ my $call_fun = $d->{name};
|
|
+ my @call_parms;
|
|
foreach (@parms) {
|
|
my $n = to_lower($_->{name});
|
|
- if ($_->{type} eq 'encoded string') {
|
|
- $accum->{headers}{'mutable string'} = true;
|
|
- $accum->{headers}{'convert'} = true;
|
|
- $ret .= " ths->temp_str_$snum.clear();\n";
|
|
- $ret .= " ths->to_internal_->convert($n, ${n}_size, ths->temp_str_$snum);\n";
|
|
- $ret .= " unsigned int s$snum = ths->temp_str_$snum.size();\n";
|
|
- $_ = "MutableString(ths->temp_str_$snum.mstr(), s$snum)";
|
|
- $snum++;
|
|
+ if ($_->{type} eq 'encoded string' && !exists($d->{'no conv'})) {
|
|
+ $need_wide = true unless $d->{wide};
|
|
+ die unless exists $d->{'posib err'};
|
|
+ $accum->{headers}{'mutable string'} = true;
|
|
+ $accum->{headers}{'convert'} = true;
|
|
+ my $name = get_c_func_name $data->{name}, $d, {mode=>'cc_cxx', use_name=>true, wide=>$d->{wide}};
|
|
+ $ret .= " ths->temp_str_$snum.clear();\n";
|
|
+ if ($d->{wide}) {
|
|
+ $ret .= " ${n}_size = get_correct_size(\"$name\", ths->to_internal_->in_type_width(), ${n}_size, ${n}_type_width);\n";
|
|
+ } else {
|
|
+ $ret .= " PosibErr<int> ${n}_fixed_size = get_correct_size(\"$name\", ths->to_internal_->in_type_width(), ${n}_size);\n";
|
|
+ if (exists($d->{'on conv error'})) {
|
|
+ $ret .= " if (${n}_fixed_size.get_err()) {\n";
|
|
+ $ret .= " ".$d->{'on conv error'}."\n";
|
|
+ $ret .= " } else {\n";
|
|
+ $ret .= " ${n}_size = ${n}_fixed_size;\n";
|
|
+ $ret .= " }\n";
|
|
+ } else {
|
|
+ $ret .= " ths->err_.reset(${n}_fixed_size.release_err());\n";
|
|
+ $ret .= " if (ths->err_ != 0) return ".(c_error_cond $ret_type).";\n";
|
|
+ }
|
|
+ }
|
|
+ $ret .= " ths->to_internal_->convert($n, ${n}_size, ths->temp_str_$snum);\n";
|
|
+ $ret .= " unsigned int s$snum = ths->temp_str_$snum.size();\n";
|
|
+ push @call_parms, "MutableString(ths->temp_str_$snum.mstr(), s$snum)";
|
|
+ $snum++;
|
|
+ } elsif ($_->{type} eq 'encoded string') {
|
|
+ $need_wide = true unless $d->{wide};
|
|
+ push @call_parms, $n, "${n}_size";
|
|
+ push @call_parms, "${n}_type_width" if $d->{wide};
|
|
+ $call_fun .= " wide" if $d->{wide};
|
|
} else {
|
|
- $_ = $n;
|
|
+ push @call_parms, $n;
|
|
}
|
|
}
|
|
- my $parms = '('.(join ', ', @parms).')';
|
|
- my $exp = "ths->".to_lower($d->{name})."$parms";
|
|
+ my $parms = '('.(join ', ', @call_parms).')';
|
|
+ my $exp = "ths->".to_lower($call_fun)."$parms";
|
|
if (exists $d->{'posib err'}) {
|
|
$accum->{headers}{'posib err'} = true;
|
|
$ret .= " PosibErr<$ret_native> ret = $exp;\n";
|
|
@@ -118,6 +146,7 @@ $info{class}{proc}{impl} = sub {
|
|
}
|
|
}
|
|
$ret .= "}\n\n";
|
|
+ unshift @d,{%$d, wide=>true} if $need_wide;
|
|
}
|
|
return $ret;
|
|
};
|
|
diff --git a/auto/MkSrc/Read.pm b/auto/MkSrc/Read.pm
|
|
index 4b3d1d0..4bf640e 100644
|
|
--- a/auto/MkSrc/Read.pm
|
|
+++ b/auto/MkSrc/Read.pm
|
|
@@ -88,13 +88,13 @@ sub advance ( ) {
|
|
$in_pod = $1 if $line =~ /^\=(\w+)/;
|
|
$line = '' if $in_pod;
|
|
$in_pod = undef if $in_pod && $in_pod eq 'cut';
|
|
- $line =~ s/\#.*$//;
|
|
+ $line =~ s/(?<!\\)\#.*$//;
|
|
$line =~ s/^(\t*)//;
|
|
$level = $base_level + length($1);
|
|
$line =~ s/\s*$//;
|
|
++$base_level if $line =~ s/^\{$//;
|
|
--$base_level if $line =~ s/^\}$//;
|
|
- $line =~ s/\\([{}])/$1/g;
|
|
+ $line =~ s/\\([{}#\\])/$1/g;
|
|
} while ($line eq '');
|
|
#print "$level:$line\n";
|
|
}
|
|
diff --git a/auto/mk-src.in b/auto/mk-src.in
|
|
index c514db3..cdc1f8c 100644
|
|
--- a/auto/mk-src.in
|
|
+++ b/auto/mk-src.in
|
|
@@ -599,6 +599,7 @@ errors:
|
|
invalid expression
|
|
mesg => "%expression" is not a valid regular expression.
|
|
parms => expression
|
|
+
|
|
}
|
|
group: speller
|
|
{
|
|
@@ -641,6 +642,7 @@ class: speller
|
|
posib err
|
|
desc => Returns 0 if it is not in the dictionary,
|
|
1 if it is, or -1 on error.
|
|
+ on conv error => return 0;
|
|
/
|
|
bool
|
|
encoded string: word
|
|
@@ -706,6 +708,8 @@ class: speller
|
|
desc => Return NULL on error.
|
|
The word list returned by suggest is only
|
|
valid until the next call to suggest.
|
|
+ on conv error =>
|
|
+ word = NULL; word_size = 0;
|
|
/
|
|
const word list
|
|
encoded string: word
|
|
@@ -831,7 +835,6 @@ class: document checker
|
|
void
|
|
|
|
method: process
|
|
-
|
|
desc => Process a string.
|
|
The string passed in should only be split on
|
|
white space characters. Furthermore, between
|
|
@@ -840,10 +843,10 @@ class: document checker
|
|
in the document. Passing in strings out of
|
|
order, skipping strings or passing them in
|
|
more than once may lead to undefined results.
|
|
+ no conv
|
|
/
|
|
void
|
|
- string: str
|
|
- int: size
|
|
+ encoded string: str
|
|
|
|
method: next misspelling
|
|
|
|
@@ -851,9 +854,23 @@ class: document checker
|
|
processed string. If there are no more
|
|
misspelled words, then token.word will be
|
|
NULL and token.size will be 0
|
|
+ cc extra =>
|
|
+ \#define aspell_document_checker_next_misspelling_w(type, ths) \\
|
|
+ aspell_document_checker_next_misspelling_adj(ths, sizeof(type))
|
|
/
|
|
token object
|
|
|
|
+ method: next misspelling adj
|
|
+ desc => internal: do not use
|
|
+ c impl =>
|
|
+ Token res = ths->next_misspelling();
|
|
+ res.offset /= type_width;
|
|
+ res.len /= type_width;
|
|
+ return res;
|
|
+ /
|
|
+ token object
|
|
+ int: type_width
|
|
+
|
|
method: filter
|
|
|
|
desc => Returns the underlying filter class.
|
|
@@ -913,9 +930,30 @@ class: string enumeration
|
|
ths->from_internal_->append_null(ths->temp_str);
|
|
return ths->temp_str.data();
|
|
\}
|
|
+ cc extra =>
|
|
+ \#define aspell_string_enumeration_next_w(type, ths) \\
|
|
+ aspell_cast_(const type *, aspell_string_enumeration_next_wide(ths, sizeof(type)))
|
|
/
|
|
const string
|
|
|
|
+ method: next wide
|
|
+ c impl =>
|
|
+ const char * s = ths->next();
|
|
+ if (s == 0) {
|
|
+ return s;
|
|
+ } else if (ths->from_internal_ == 0) \{
|
|
+ assert(type_width == 1);
|
|
+ return s;
|
|
+ \} else \{
|
|
+ assert(type_width == ths->from_internal_->out_type_width());
|
|
+ ths->temp_str.clear();
|
|
+ ths->from_internal_->convert(s,-1,ths->temp_str);
|
|
+ ths->from_internal_->append_null(ths->temp_str);
|
|
+ return ths->temp_str.data();
|
|
+ \}
|
|
+ /
|
|
+ const void pointer
|
|
+ int: type_width
|
|
}
|
|
group: info
|
|
{
|
|
diff --git a/common/convert.cpp b/common/convert.cpp
|
|
index 7fd3895..77804c0 100644
|
|
--- a/common/convert.cpp
|
|
+++ b/common/convert.cpp
|
|
@@ -511,18 +511,25 @@ namespace acommon {
|
|
// Trivial Conversion
|
|
//
|
|
|
|
+ const char * unsupported_null_term_wide_string_msg =
|
|
+ "Null-terminated wide-character strings unsupported when used this way.";
|
|
+
|
|
template <typename Chr>
|
|
struct DecodeDirect : public Decode
|
|
{
|
|
+ DecodeDirect() {type_width = sizeof(Chr);}
|
|
void decode(const char * in0, int size, FilterCharVector & out) const {
|
|
const Chr * in = reinterpret_cast<const Chr *>(in0);
|
|
- if (size == -1) {
|
|
+ if (size == -sizeof(Chr)) {
|
|
for (;*in; ++in)
|
|
- out.append(*in);
|
|
+ out.append(*in, sizeof(Chr));
|
|
+ } else if (size <= -1) {
|
|
+ fprintf(stderr, "%s\n", unsupported_null_term_wide_string_msg);
|
|
+ abort();
|
|
} else {
|
|
- const Chr * stop = reinterpret_cast<const Chr *>(in0 +size);
|
|
+ const Chr * stop = reinterpret_cast<const Chr *>(in0) + size/sizeof(Chr);
|
|
for (;in != stop; ++in)
|
|
- out.append(*in);
|
|
+ out.append(*in, sizeof(Chr));
|
|
}
|
|
}
|
|
PosibErr<void> decode_ec(const char * in0, int size,
|
|
@@ -535,6 +542,7 @@ namespace acommon {
|
|
template <typename Chr>
|
|
struct EncodeDirect : public Encode
|
|
{
|
|
+ EncodeDirect() {type_width = sizeof(Chr);}
|
|
void encode(const FilterChar * in, const FilterChar * stop,
|
|
CharVector & out) const {
|
|
for (; in != stop; ++in) {
|
|
@@ -564,11 +572,15 @@ namespace acommon {
|
|
template <typename Chr>
|
|
struct ConvDirect : public DirectConv
|
|
{
|
|
+ ConvDirect() {type_width = sizeof(Chr);}
|
|
void convert(const char * in0, int size, CharVector & out) const {
|
|
- if (size == -1) {
|
|
+ if (size == -sizeof(Chr)) {
|
|
const Chr * in = reinterpret_cast<const Chr *>(in0);
|
|
for (;*in != 0; ++in)
|
|
out.append(in, sizeof(Chr));
|
|
+ } else if (size <= -1) {
|
|
+ fprintf(stderr, "%s\n", unsupported_null_term_wide_string_msg);
|
|
+ abort();
|
|
} else {
|
|
out.append(in0, size);
|
|
}
|
|
@@ -1092,5 +1104,20 @@ namespace acommon {
|
|
}
|
|
return 0;
|
|
}
|
|
-
|
|
+
|
|
+ PosibErr<void> unsupported_null_term_wide_string_err_(const char * func) {
|
|
+ static bool reported_to_stderr = false;
|
|
+ PosibErr<void> err = make_err(other_error, unsupported_null_term_wide_string_msg);
|
|
+ if (!reported_to_stderr) {
|
|
+ CERR.printf("ERROR: %s: %s\n", func, unsupported_null_term_wide_string_msg);
|
|
+ reported_to_stderr = true;
|
|
+ }
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ void unsupported_null_term_wide_string_abort_(const char * func) {
|
|
+ CERR.printf("%s: %s\n", unsupported_null_term_wide_string_msg);
|
|
+ abort();
|
|
+ }
|
|
+
|
|
}
|
|
diff --git a/common/convert.hpp b/common/convert.hpp
|
|
index 76332ee..c948973 100644
|
|
--- a/common/convert.hpp
|
|
+++ b/common/convert.hpp
|
|
@@ -7,6 +7,8 @@
|
|
#ifndef ASPELL_CONVERT__HPP
|
|
#define ASPELL_CONVERT__HPP
|
|
|
|
+#include "settings.h"
|
|
+
|
|
#include "string.hpp"
|
|
#include "posib_err.hpp"
|
|
#include "char_vector.hpp"
|
|
@@ -25,8 +27,9 @@ namespace acommon {
|
|
typedef const Config CacheConfig;
|
|
typedef const char * CacheKey;
|
|
String key;
|
|
+ int type_width; // type width in bytes
|
|
bool cache_key_eq(const char * l) const {return key == l;}
|
|
- ConvBase() {}
|
|
+ ConvBase() : type_width(1) {}
|
|
private:
|
|
ConvBase(const ConvBase &);
|
|
void operator=(const ConvBase &);
|
|
@@ -56,6 +59,8 @@ namespace acommon {
|
|
virtual ~Encode() {}
|
|
};
|
|
struct DirectConv { // convert directly from in_code to out_code.
|
|
+ int type_width; // type width in bytes
|
|
+ DirectConv() : type_width(1) {}
|
|
// should not take ownership of decode and encode.
|
|
// decode and encode guaranteed to stick around for the life
|
|
// of the object.
|
|
@@ -126,6 +131,9 @@ namespace acommon {
|
|
const char * in_code() const {return decode_->key.c_str();}
|
|
const char * out_code() const {return encode_->key.c_str();}
|
|
|
|
+ int in_type_width() const {return decode_->type_width;}
|
|
+ int out_type_width() const {return encode_->type_width;}
|
|
+
|
|
void append_null(CharVector & out) const
|
|
{
|
|
const char nul[4] = {0,0,0,0}; // 4 should be enough
|
|
@@ -191,6 +199,10 @@ namespace acommon {
|
|
}
|
|
}
|
|
|
|
+ void convert(const void * in, int size, CharVector & out) {
|
|
+ convert(static_cast<const char *>(in), size, out);
|
|
+ }
|
|
+
|
|
void generic_convert(const char * in, int size, CharVector & out);
|
|
|
|
};
|
|
@@ -412,6 +424,30 @@ namespace acommon {
|
|
return operator()(str, str + byte_size);}
|
|
};
|
|
|
|
+#ifdef SLOPPY_NULL_TERM_STRINGS
|
|
+ static const bool sloppy_null_term_strings = true;
|
|
+#else
|
|
+ static const bool sloppy_null_term_strings = false;
|
|
+#endif
|
|
+
|
|
+ PosibErr<void> unsupported_null_term_wide_string_err_(const char * func);
|
|
+ void unsupported_null_term_wide_string_abort_(const char * func);
|
|
+
|
|
+ static inline PosibErr<int> get_correct_size(const char * func, int conv_type_width, int size) {
|
|
+ if (sloppy_null_term_strings && size <= -1)
|
|
+ return -conv_type_width;
|
|
+ if (size <= -1 && -conv_type_width != size)
|
|
+ return unsupported_null_term_wide_string_err_(func);
|
|
+ return size;
|
|
+ }
|
|
+ static inline int get_correct_size(const char * func, int conv_type_width, int size, int type_width) {
|
|
+ if ((sloppy_null_term_strings || type_width <= -1) && size <= -1)
|
|
+ return -conv_type_width;
|
|
+ if (size <= -1 && conv_type_width != type_width)
|
|
+ unsupported_null_term_wide_string_abort_(func);
|
|
+ return size;
|
|
+ }
|
|
+
|
|
}
|
|
|
|
#endif
|
|
diff --git a/common/document_checker.cpp b/common/document_checker.cpp
|
|
index 5e510c4..0ccf1cd 100644
|
|
--- a/common/document_checker.cpp
|
|
+++ b/common/document_checker.cpp
|
|
@@ -44,7 +44,9 @@ namespace acommon {
|
|
void DocumentChecker::process(const char * str, int size)
|
|
{
|
|
proc_str_.clear();
|
|
- conv_->decode(str, size, proc_str_);
|
|
+ PosibErr<int> fixed_size = get_correct_size("aspell_document_checker_process", conv_->in_type_width(), size);
|
|
+ if (!fixed_size.has_err())
|
|
+ conv_->decode(str, fixed_size, proc_str_);
|
|
proc_str_.append(0);
|
|
FilterChar * begin = proc_str_.pbegin();
|
|
FilterChar * end = proc_str_.pend() - 1;
|
|
@@ -53,6 +55,19 @@ namespace acommon {
|
|
tokenizer_->reset(begin, end);
|
|
}
|
|
|
|
+ void DocumentChecker::process_wide(const void * str, int size, int type_width)
|
|
+ {
|
|
+ proc_str_.clear();
|
|
+ int fixed_size = get_correct_size("aspell_document_checker_process", conv_->in_type_width(), size, type_width);
|
|
+ conv_->decode(static_cast<const char *>(str), fixed_size, proc_str_);
|
|
+ proc_str_.append(0);
|
|
+ FilterChar * begin = proc_str_.pbegin();
|
|
+ FilterChar * end = proc_str_.pend() - 1;
|
|
+ if (filter_)
|
|
+ filter_->process(begin, end);
|
|
+ tokenizer_->reset(begin, end);
|
|
+ }
|
|
+
|
|
Token DocumentChecker::next_misspelling()
|
|
{
|
|
bool correct;
|
|
diff --git a/common/document_checker.hpp b/common/document_checker.hpp
|
|
index d35bb88..11a3c73 100644
|
|
--- a/common/document_checker.hpp
|
|
+++ b/common/document_checker.hpp
|
|
@@ -36,6 +36,7 @@ namespace acommon {
|
|
PosibErr<void> setup(Tokenizer *, Speller *, Filter *);
|
|
void reset();
|
|
void process(const char * str, int size);
|
|
+ void process_wide(const void * str, int size, int type_width);
|
|
Token next_misspelling();
|
|
|
|
Filter * filter() {return filter_;}
|
|
diff --git a/configure.ac b/configure.ac
|
|
index ed26945..51a31cb 100644
|
|
--- a/configure.ac
|
|
+++ b/configure.ac
|
|
@@ -70,6 +70,9 @@ AC_ARG_ENABLE(compile-in-filters,
|
|
AC_ARG_ENABLE(filter-version-control,
|
|
[ --disable-filter-version-control])
|
|
|
|
+AC_ARG_ENABLE(sloppy-null-term-strings,
|
|
+ AS_HELP_STRING([--enable-sloppy-null-term-strings],[allows allow null terminated UCS-2 and UCS-4 strings]))
|
|
+
|
|
AC_ARG_ENABLE(pspell-compatibility,
|
|
AS_HELP_STRING([--disable-pspell-compatibility],[don't install pspell compatibility libraries]))
|
|
|
|
@@ -133,6 +136,11 @@ fi
|
|
AM_CONDITIONAL(COMPILE_IN_FILTERS,
|
|
[test "$enable_compile_in_filters" = "yes"])
|
|
|
|
+if test "$enable_sloppy_null_term_strings" = "yes"
|
|
+then
|
|
+ AC_DEFINE(SLOPPY_NULL_TERM_STRINGS, 1, [Defined if null-terminated UCS-2 and UCS-4 strings should always be allowed.])
|
|
+fi
|
|
+
|
|
AM_CONDITIONAL(PSPELL_COMPATIBILITY,
|
|
[test "$enable_pspell_compatibility" != "no"])
|
|
AM_CONDITIONAL(INCREMENTED_SONAME,
|
|
diff --git a/manual/aspell.texi b/manual/aspell.texi
|
|
index 080f894..f331fb0 100644
|
|
--- a/manual/aspell.texi
|
|
+++ b/manual/aspell.texi
|
|
@@ -158,7 +158,8 @@ Installing
|
|
|
|
* Generic Install Instructions::
|
|
* HTML Manuals and "make clean"::
|
|
-* Curses Notes::
|
|
+* Curses Notes::
|
|
+* Upgrading from Aspell 0.60.7::
|
|
* Loadable Filter Notes::
|
|
* Upgrading from Aspell 0.50::
|
|
* Upgrading from Aspell .33/Pspell .12::
|
|
@@ -2196,18 +2197,26 @@ int correct = aspell_speller_check(spell_checker, @var{word}, @var{size});
|
|
@end smallexample
|
|
|
|
@noindent
|
|
-@var{word} is expected to be a @code{const char *} character
|
|
-string. If the encoding is set to be @code{ucs-2} or
|
|
-@code{ucs-4} @var{word} is expected to be a cast
|
|
-from either @code{const u16int *} or @code{const u32int *}
|
|
-respectively. @code{u16int} and @code{u32int} are generally
|
|
-@code{unsigned short} and @code{unsigned int} respectively.
|
|
-@var{size} is the length of the string or @code{-1} if the string
|
|
-is null terminated. If the string is a cast from @code{const u16int
|
|
-*} or @code{const u32int *} then @code{@i{size}} is the amount of
|
|
-space in bytes the string takes up after being cast to @code{const
|
|
-char *} and not the true size of the string. @code{sspell_speller_check}
|
|
-will return @code{0} if it is not found and non-zero otherwise.
|
|
+@var{word} is expected to be a @code{const char *} character string.
|
|
+@var{size} is the length of the string or @code{-1} if the string is
|
|
+null terminated. @code{aspell_speller_check} will return @code{0} if it is not found
|
|
+and non-zero otherwise.
|
|
+
|
|
+If you are using the @code{ucs-2} or @code{ucs-4} encoding then the
|
|
+string is expected to be either a 2 or 4 byte wide integer
|
|
+(respectively) and the @code{_w} macro vesion should be used:
|
|
+
|
|
+@smallexample
|
|
+int correct = aspell_speller_check_w(spell_checker, @var{word}, @var{size});
|
|
+@end smallexample
|
|
+
|
|
+The macro will cast the string to to the correct type and convert
|
|
+@var{size} into bytes for you and then a call the special wide version of the
|
|
+function that will make sure the encoding is correct for the type
|
|
+passed in. For compatibility with older versions of Aspell the normal
|
|
+non-wide functions can still be used provided that the size of the
|
|
+string, in bytes, is also passed in. Null terminated @code{ucs-2} or
|
|
+@code{ucs-4} are no longer supported when using the non-wide functions.
|
|
|
|
If the word is not correct, then the @code{suggest} method can be used
|
|
to come up with likely replacements.
|
|
@@ -2226,7 +2235,28 @@ delete_aspell_string_enumeration(elements);
|
|
|
|
Notice how @code{elements} is deleted but @code{suggestions} is not.
|
|
The value returned by @code{suggestions} is only valid to the next
|
|
-call to @code{suggest}. Once a replacement is made the
|
|
+call to @code{suggest}.
|
|
+
|
|
+If you are using the @code{ucs-2} or @code{ucs-4} encoding then, in
|
|
+addition to using the @code{_w} macro for the @code{suggest} method, you
|
|
+should also use the @code{_w} macro with the @code{next} method which
|
|
+will cast the string to the correct type for you. For example, if you
|
|
+are using the @code{ucs-2} encoding and the string is a @code{const
|
|
+uint16_t *} then you should use:
|
|
+
|
|
+@smallexample
|
|
+AspellWordList * suggestions = aspell_speller_suggest_w(spell_checker,
|
|
+ @var{word}, @var{size});
|
|
+AspellStringEnumeration * elements = aspell_word_list_elements(suggestions);
|
|
+const uint16_t * word;
|
|
+while ( (word = aspell_string_enumeration_next_w(uint16_t, aspell_elements)) != NULL )
|
|
+@{
|
|
+ // add to suggestion list
|
|
+@}
|
|
+delete_aspell_string_enumeration(elements);
|
|
+@end smallexample
|
|
+
|
|
+Once a replacement is made the
|
|
@code{store_repl} method should be used to communicate the replacement
|
|
pair back to the spell checker (for the reason, @pxref{Notes on
|
|
Storing Replacement Pairs}). Its usage is as follows:
|
|
diff --git a/manual/readme.texi b/manual/readme.texi
|
|
index 4bfb2e2..ae680da 100644
|
|
--- a/manual/readme.texi
|
|
+++ b/manual/readme.texi
|
|
@@ -15,14 +15,16 @@ The latest version can always be found at GNU Aspell's home page at
|
|
@uref{http://aspell.net}.
|
|
|
|
@menu
|
|
-* Generic Install Instructions::
|
|
-* HTML Manuals and "make clean"::
|
|
-* Curses Notes::
|
|
-* Loadable Filter Notes::
|
|
-* Upgrading from Aspell 0.50::
|
|
-* Upgrading from Aspell .33/Pspell .12::
|
|
-* Upgrading from a Pre-0.50 snapshot::
|
|
-* WIN32 Notes::
|
|
+* Generic Install Instructions::
|
|
+* HTML Manuals and "make clean"::
|
|
+* Curses Notes::
|
|
+* Upgrading from Aspell 0.60.7::
|
|
+* Loadable Filter Notes::
|
|
+* Using 32-Bit Dictionaries on a 64-Bit System::
|
|
+* Upgrading from Aspell 0.50::
|
|
+* Upgrading from Aspell .33/Pspell .12::
|
|
+* Upgrading from a Pre-0.50 snapshot::
|
|
+* WIN32 Notes::
|
|
@end menu
|
|
|
|
@node Generic Install Instructions
|
|
@@ -120,6 +122,52 @@ In addition your system must also support the @code{mblen} function.
|
|
Although this function was defined in the ISO C89 standard (ANSI
|
|
X3.159-1989), not all systems have it.
|
|
|
|
+@node Upgrading from Aspell 0.60.7
|
|
+@appendixsec Upgrading from Aspell 0.60.7
|
|
+
|
|
+To prevent a potentially unbounded buffer over-read, Aspell no longer
|
|
+supports null-terminated UCS-2 and UCS-4 encoded strings with the
|
|
+original C API. Null-termianted 8-bit or UTF-8 encoded strings are
|
|
+still supported, as are UCS-2 and UCS-4 encoded strings when the
|
|
+length is passed in.
|
|
+
|
|
+As of Aspell 0.60.8 a function from the original API that expects an
|
|
+encoded string as a parameter will return meaningless results (or an
|
|
+error code) if string is null terminated and the encoding is set to
|
|
+@code{ucs-2} or @code{ucs-4}. In addition, a single:
|
|
+@example
|
|
+ERROR: aspell_speller_check: Null-terminated wide-character strings unsupported when used this way.
|
|
+@end example
|
|
+will be printed to standard error the first time one of those
|
|
+functions is called.
|
|
+
|
|
+Application that use null-terminated UCS-2/4 strings should either (1)
|
|
+use the interface intended for working with wide-characters
|
|
+(@xref{Through the C API}); or (2) define
|
|
+@code{ASPELL_ENCODE_SETTING_SECURE} before including @code{aspell.h}.
|
|
+In the latter case is is important that the application explicitly
|
|
+sets the encoding to a known value. Defining
|
|
+@code{ASPELL_ENCODE_SETTING_SECURE} and not setting the encoding
|
|
+explicitly or allowing user of the application to set the encoding
|
|
+could result in an unbounded buffer over-read.
|
|
+
|
|
+If it is necessary to preserve binary compatibility with older
|
|
+versions of Aspell, the easiest thing would be to determine the length
|
|
+of the UCS-2/4 string---in bytes---and pass that in. Due to an
|
|
+implemenation detail, existing API functions can be made to work with
|
|
+null-terminated UCS-2/4 strings safely by passing in either @code{-2}
|
|
+or @code{-4} (corresponding to the width of the character type) as the
|
|
+size. Doing so, however, will cause a buffer over-read for unpatched
|
|
+version of Aspell. To avoid this it will be necessary to parse the
|
|
+version string to determine the correct value to use. However, no
|
|
+official support will be provided for the latter method.
|
|
+
|
|
+If the application can not be recompiled, then Aspell can be configured
|
|
+to preserve the old behavior by passing
|
|
+@option{--enable-sloppy-null-term-strings} to @command{configure}. When Aspell
|
|
+is compiled this way the version string will include the string
|
|
+@samp{ SLOPPY}.
|
|
+
|
|
@node Loadable Filter Notes
|
|
@appendixsec Loadable Filter Notes
|
|
|
|
@@ -129,8 +177,7 @@ errors when trying to use a filter, then it is likely that loadable
|
|
filter support is not working yet on your platform. Thus, in order to
|
|
get Aspell to work correctly you will need to avoid compiling the
|
|
filters as individual modules by using the
|
|
-@option{--enable-compile-in-filters} when configuring Aspell with
|
|
-@command{./configure}.
|
|
+@option{--enable-compile-in-filters} @command{configure} option.
|
|
|
|
@node Upgrading from Aspell 0.50
|
|
@appendixsec Upgrading from Aspell 0.50
|
|
diff --git a/test/cxx_warnings_test.cpp b/test/cxx_warnings_test.cpp
|
|
new file mode 100644
|
|
index 0000000..923bfc8
|
|
--- /dev/null
|
|
+++ b/test/cxx_warnings_test.cpp
|
|
@@ -0,0 +1,84 @@
|
|
+ #include <stdint.h>
|
|
+#include <stdio.h>
|
|
+#include <stdlib.h>
|
|
+#include <string.h>
|
|
+
|
|
+#include <aspell.h>
|
|
+
|
|
+const uint16_t test_word[] = {'c','a','f', 0x00E9, 0};
|
|
+const uint16_t test_incorrect[] = {'c','a','f', 'e', 0};
|
|
+const uint16_t test_doc[] = {'T', 'h', 'e', ' ', 'c','a','f', 0x00E9, '.', 0};
|
|
+
|
|
+int fail = 0;
|
|
+
|
|
+void f1() {
|
|
+ AspellConfig * spell_config = new_aspell_config();
|
|
+ aspell_config_replace(spell_config, "master", "en_US-w_accents");
|
|
+ aspell_config_replace(spell_config, "encoding", "ucs-2");
|
|
+ AspellCanHaveError * possible_err = new_aspell_speller(spell_config);
|
|
+ AspellSpeller * spell_checker = 0;
|
|
+ if (aspell_error_number(possible_err) != 0) {
|
|
+ fprintf(stderr, "%s", aspell_error_message(possible_err));
|
|
+ exit(0);
|
|
+ } else {
|
|
+ spell_checker = to_aspell_speller(possible_err);
|
|
+ }
|
|
+ int correct = aspell_speller_check_w(spell_checker, test_word, -1);
|
|
+ if (!correct) {
|
|
+ fprintf(stderr, "%s", "fail: expected word to be correct\n");
|
|
+ fail = 1;
|
|
+ }
|
|
+ correct = aspell_speller_check_w(spell_checker, test_incorrect, -1);
|
|
+ if (correct) {
|
|
+ fprintf(stderr, "%s", "fail: expected word to be incorrect\n");
|
|
+ fail = 1;
|
|
+ }
|
|
+ const AspellWordList * suggestions = aspell_speller_suggest_w(spell_checker, test_incorrect, -1);
|
|
+ AspellStringEnumeration * elements = aspell_word_list_elements(suggestions);
|
|
+ const uint16_t * word = aspell_string_enumeration_next_w(uint16_t, elements);
|
|
+ if (memcmp(word, test_word, sizeof(test_incorrect)) != 0) {
|
|
+ fprintf(stderr, "%s", "fail: first suggesion is not what is expected\n");
|
|
+ fail = 1;
|
|
+ delete_aspell_string_enumeration(elements);
|
|
+ }
|
|
+ if (fail)
|
|
+ printf("not ok\n");
|
|
+ else
|
|
+ printf("ok\n");
|
|
+}
|
|
+
|
|
+void f2() {
|
|
+ AspellConfig * spell_config = new_aspell_config();
|
|
+ aspell_config_replace(spell_config, "master", "en_US-w_accents");
|
|
+ aspell_config_replace(spell_config, "encoding", "ucs-2");
|
|
+ AspellCanHaveError * possible_err = new_aspell_speller(spell_config);
|
|
+ AspellSpeller * spell_checker = 0;
|
|
+ if (aspell_error_number(possible_err) != 0) {
|
|
+ fprintf(stderr, "%s", aspell_error_message(possible_err));
|
|
+ exit(0);
|
|
+ } else {
|
|
+ spell_checker = to_aspell_speller(possible_err);
|
|
+ }
|
|
+ int correct = aspell_speller_check_w(spell_checker, test_word, -1);
|
|
+ if (!correct) {
|
|
+ fprintf(stderr, "%s", "fail: expected word to be correct\n");
|
|
+ fail = 1;
|
|
+ }
|
|
+ correct = aspell_speller_check_w(spell_checker, test_incorrect, -1);
|
|
+ if (correct) {
|
|
+ fprintf(stderr, "%s", "fail: expected word to be incorrect\n");
|
|
+ fail = 1;
|
|
+ }
|
|
+ const AspellWordList * suggestions = aspell_speller_suggest_w(spell_checker, test_incorrect, -1);
|
|
+ AspellStringEnumeration * elements = aspell_word_list_elements(suggestions);
|
|
+ const uint16_t * word = aspell_string_enumeration_next_w(uint16_t, elements);
|
|
+ if (memcmp(word, test_word, sizeof(test_incorrect)) != 0) {
|
|
+ fprintf(stderr, "%s", "fail: first suggesion is not what is expected\n");
|
|
+ fail = 1;
|
|
+ delete_aspell_string_enumeration(elements);
|
|
+ }
|
|
+ if (fail)
|
|
+ printf("not ok\n");
|
|
+ else
|
|
+ printf("ok\n");
|
|
+}
|
|
diff --git a/test/wide_test_invalid.c b/test/wide_test_invalid.c
|
|
new file mode 100644
|
|
index 0000000..d5e2ed7
|
|
--- /dev/null
|
|
+++ b/test/wide_test_invalid.c
|
|
@@ -0,0 +1,69 @@
|
|
+#include <stdint.h>
|
|
+#include <stdio.h>
|
|
+#include <stdlib.h>
|
|
+#include <string.h>
|
|
+
|
|
+#include <aspell.h>
|
|
+
|
|
+const uint16_t test_word[] = {'c','a','f', 0x00E9, 0};
|
|
+const uint16_t test_incorrect[] = {'c','a','f', 'e', 0};
|
|
+const uint16_t test_doc[] = {'T', 'h', 'e', ' ', 'c','a','f', 'e', '.', 0};
|
|
+
|
|
+int fail = 0;
|
|
+
|
|
+int main() {
|
|
+ AspellConfig * spell_config = new_aspell_config();
|
|
+ aspell_config_replace(spell_config, "master", "en_US-w_accents");
|
|
+ aspell_config_replace(spell_config, "encoding", "ucs-2");
|
|
+ AspellCanHaveError * possible_err = new_aspell_speller(spell_config);
|
|
+ AspellSpeller * spell_checker = 0;
|
|
+ if (aspell_error_number(possible_err) != 0) {
|
|
+ fprintf(stderr, "%s", aspell_error_message(possible_err));
|
|
+ return 2;
|
|
+ } else {
|
|
+ spell_checker = to_aspell_speller(possible_err);
|
|
+ }
|
|
+ int correct = aspell_speller_check(spell_checker, (const char *)test_word, -1);
|
|
+ if (!correct) {
|
|
+ fprintf(stderr, "%s", "fail: expected word to be correct\n");
|
|
+ fail = 1;
|
|
+ }
|
|
+ correct = aspell_speller_check(spell_checker, (const char *)test_incorrect, -1);
|
|
+ if (correct) {
|
|
+ fprintf(stderr, "%s", "fail: expected word to be incorrect\n");
|
|
+ fail = 1;
|
|
+ }
|
|
+ const AspellWordList * suggestions = aspell_speller_suggest(spell_checker, (const char *)test_incorrect, -1);
|
|
+ AspellStringEnumeration * elements = aspell_word_list_elements(suggestions);
|
|
+ const char * word = aspell_string_enumeration_next(elements);
|
|
+ if (memcmp(word, test_word, sizeof(test_incorrect)) != 0) {
|
|
+ fprintf(stderr, "%s", "fail: first suggesion is not what is expected\n");
|
|
+ fail = 1;
|
|
+ }
|
|
+ delete_aspell_string_enumeration(elements);
|
|
+
|
|
+ possible_err = new_aspell_document_checker(spell_checker);
|
|
+ if (aspell_error(possible_err) != 0) {
|
|
+ fprintf(stderr, "Error: %s\n",aspell_error_message(possible_err));
|
|
+ return 2;
|
|
+ }
|
|
+ AspellDocumentChecker * checker = to_aspell_document_checker(possible_err);
|
|
+ aspell_document_checker_process(checker, (const char *)test_doc, -1);
|
|
+
|
|
+ AspellToken token = aspell_document_checker_next_misspelling(checker);
|
|
+ if (sizeof(test_incorrect) - sizeof(uint16_t) != token.len) {
|
|
+ fprintf(stderr, "fail: size of first misspelling (%d) is not what is expected (%lu)\n",
|
|
+ token.len, sizeof(test_incorrect) - sizeof(uint16_t));
|
|
+ fail = 1;
|
|
+ } else if (memcmp(test_incorrect, (const char *)test_doc + token.offset, token.len) != 0) {
|
|
+ fprintf(stderr, "%s", "fail: first misspelling is not what is expected\n");
|
|
+ fail = 1;
|
|
+ }
|
|
+ if (fail) {
|
|
+ printf("not ok\n");
|
|
+ return 1;
|
|
+ } else {
|
|
+ printf("ok\n");
|
|
+ return 0;
|
|
+ }
|
|
+}
|
|
diff --git a/test/wide_test_valid.c b/test/wide_test_valid.c
|
|
new file mode 100644
|
|
index 0000000..bc3006d
|
|
--- /dev/null
|
|
+++ b/test/wide_test_valid.c
|
|
@@ -0,0 +1,69 @@
|
|
+#include <stdint.h>
|
|
+#include <stdio.h>
|
|
+#include <stdlib.h>
|
|
+#include <string.h>
|
|
+
|
|
+#include <aspell.h>
|
|
+
|
|
+const uint16_t test_word[] = {'c','a','f', 0x00E9, 0};
|
|
+const uint16_t test_incorrect[] = {'c','a','f', 'e', 0};
|
|
+const uint16_t test_doc[] = {'T', 'h', 'e', ' ', 'c','a','f', 'e', '.', 0};
|
|
+
|
|
+int fail = 0;
|
|
+
|
|
+int main() {
|
|
+ AspellConfig * spell_config = new_aspell_config();
|
|
+ aspell_config_replace(spell_config, "master", "en_US-w_accents");
|
|
+ aspell_config_replace(spell_config, "encoding", "ucs-2");
|
|
+ AspellCanHaveError * possible_err = new_aspell_speller(spell_config);
|
|
+ AspellSpeller * spell_checker = 0;
|
|
+ if (aspell_error_number(possible_err) != 0) {
|
|
+ fprintf(stderr, "%s", aspell_error_message(possible_err));
|
|
+ return 2;
|
|
+ } else {
|
|
+ spell_checker = to_aspell_speller(possible_err);
|
|
+ }
|
|
+ int correct = aspell_speller_check_w(spell_checker, test_word, -1);
|
|
+ if (!correct) {
|
|
+ fprintf(stderr, "%s", "fail: expected word to be correct\n");
|
|
+ fail = 1;
|
|
+ }
|
|
+ correct = aspell_speller_check_w(spell_checker, test_incorrect, -1);
|
|
+ if (correct) {
|
|
+ fprintf(stderr, "%s", "fail: expected word to be incorrect\n");
|
|
+ fail = 1;
|
|
+ }
|
|
+ const AspellWordList * suggestions = aspell_speller_suggest_w(spell_checker, test_incorrect, -1);
|
|
+ AspellStringEnumeration * elements = aspell_word_list_elements(suggestions);
|
|
+ const uint16_t * word = aspell_string_enumeration_next_w(uint16_t, elements);
|
|
+ if (memcmp(word, test_word, sizeof(test_incorrect)) != 0) {
|
|
+ fprintf(stderr, "%s", "fail: first suggesion is not what is expected\n");
|
|
+ fail = 1;
|
|
+ }
|
|
+ delete_aspell_string_enumeration(elements);
|
|
+
|
|
+ possible_err = new_aspell_document_checker(spell_checker);
|
|
+ if (aspell_error(possible_err) != 0) {
|
|
+ fprintf(stderr, "Error: %s\n",aspell_error_message(possible_err));
|
|
+ return 2;
|
|
+ }
|
|
+ AspellDocumentChecker * checker = to_aspell_document_checker(possible_err);
|
|
+ aspell_document_checker_process_w(checker, test_doc, -1);
|
|
+
|
|
+ AspellToken token = aspell_document_checker_next_misspelling_w(uint16_t, checker);
|
|
+ if (4 != token.len) {
|
|
+ fprintf(stderr, "fail: size of first misspelling (%d) is not what is expected (%d)\n",
|
|
+ token.len, 4);
|
|
+ fail = 1;
|
|
+ } else if (memcmp(test_incorrect, test_doc + token.offset, token.len) != 0) {
|
|
+ fprintf(stderr, "%s", "fail: first misspelling is not what is expected\n");
|
|
+ fail = 1;
|
|
+ }
|
|
+ if (fail) {
|
|
+ printf("not ok\n");
|
|
+ return 1;
|
|
+ } else {
|
|
+ printf("ok\n");
|
|
+ return 0;
|
|
+ }
|
|
+}
|
|
--
|
|
2.27.0
|
|
|