1 | // Copyright Vladimir Prus 2002-2004. |
---|
2 | // Distributed under the Boost Software License, Version 1.0. |
---|
3 | // (See accompanying file LICENSE_1_0.txt |
---|
4 | // or copy at http://www.boost.org/LICENSE_1_0.txt) |
---|
5 | |
---|
6 | #include <string> |
---|
7 | #include <fstream> |
---|
8 | #include <sstream> |
---|
9 | #include <iostream> |
---|
10 | #include <boost/progress.hpp> |
---|
11 | #include <boost/bind.hpp> |
---|
12 | #include <boost/ref.hpp> |
---|
13 | |
---|
14 | #include <boost/program_options/detail/convert.hpp> |
---|
15 | #include <boost/program_options/detail/utf8_codecvt_facet.hpp> |
---|
16 | |
---|
17 | using namespace std; |
---|
18 | |
---|
19 | string file_content(const string& filename) |
---|
20 | { |
---|
21 | ifstream ifs(filename.c_str()); |
---|
22 | assert(ifs); |
---|
23 | |
---|
24 | stringstream ss; |
---|
25 | ss << ifs.rdbuf(); |
---|
26 | |
---|
27 | return ss.str(); |
---|
28 | } |
---|
29 | |
---|
30 | // A version of from_8_bit which does not use functional object, for |
---|
31 | // performance comparison. |
---|
32 | std::wstring from_8_bit_2(const std::string& s, |
---|
33 | const codecvt<wchar_t, char, mbstate_t>& cvt) |
---|
34 | { |
---|
35 | std::wstring result; |
---|
36 | |
---|
37 | |
---|
38 | std::mbstate_t state = {0}; |
---|
39 | |
---|
40 | const char* from = s.data(); |
---|
41 | const char* from_end = s.data() + s.size(); |
---|
42 | // The interace of cvt is not really iterator-like, and it's |
---|
43 | // not possible the tell the required output size without the conversion. |
---|
44 | // All we can is convert data by pieces. |
---|
45 | while(from != from_end) { |
---|
46 | |
---|
47 | // std::basic_string does not provide non-const pointers to the data, |
---|
48 | // so converting directly into string is not possible. |
---|
49 | wchar_t buffer[32]; |
---|
50 | |
---|
51 | wchar_t* to_next = buffer; |
---|
52 | // Try to convert remaining input. |
---|
53 | std::codecvt_base::result r = |
---|
54 | cvt.in(state, from, from_end, from, buffer, buffer + 32, to_next); |
---|
55 | |
---|
56 | if (r == std::codecvt_base::error) |
---|
57 | throw logic_error("character conversion failed"); |
---|
58 | // 'partial' is not an error, it just means not all source characters |
---|
59 | // we converted. However, we need to check that at least one new target |
---|
60 | // character was produced. If not, it means the source data is |
---|
61 | // incomplete, and since we don't have extra data to add to source, it's |
---|
62 | // error. |
---|
63 | if (to_next == buffer) |
---|
64 | throw logic_error("character conversion failed"); |
---|
65 | |
---|
66 | // Add converted characters |
---|
67 | result.append(buffer, to_next); |
---|
68 | } |
---|
69 | |
---|
70 | return result; |
---|
71 | } |
---|
72 | |
---|
73 | |
---|
74 | void test_convert(const std::string& input, |
---|
75 | const std::string& expected_output) |
---|
76 | { |
---|
77 | boost::program_options::detail::utf8_codecvt_facet<wchar_t, char> facet; |
---|
78 | |
---|
79 | std::wstring output; |
---|
80 | { |
---|
81 | boost::progress_timer t; |
---|
82 | for (int i = 0; i < 10000; ++i) |
---|
83 | output = boost::from_8_bit( |
---|
84 | input, |
---|
85 | facet); |
---|
86 | } |
---|
87 | |
---|
88 | { |
---|
89 | boost::progress_timer t; |
---|
90 | for (int i = 0; i < 10000; ++i) |
---|
91 | output = from_8_bit_2( |
---|
92 | input, |
---|
93 | facet); |
---|
94 | } |
---|
95 | |
---|
96 | BOOST_CHECK(output.size()*2 == expected_output.size()); |
---|
97 | |
---|
98 | for(unsigned i = 0; i < output.size(); ++i) { |
---|
99 | |
---|
100 | { |
---|
101 | unsigned low = output[i]; |
---|
102 | low &= 0xFF; |
---|
103 | unsigned low2 = expected_output[2*i]; |
---|
104 | low2 &= 0xFF; |
---|
105 | BOOST_CHECK(low == low2); |
---|
106 | } |
---|
107 | { |
---|
108 | unsigned high = output[i]; |
---|
109 | high >>= 8; |
---|
110 | high &= 0xFF; |
---|
111 | unsigned high2 = expected_output[2*i+1]; |
---|
112 | BOOST_CHECK(high == high2); |
---|
113 | } |
---|
114 | } |
---|
115 | |
---|
116 | string ref = boost::to_8_bit(output, facet); |
---|
117 | |
---|
118 | BOOST_CHECK(ref == input); |
---|
119 | } |
---|
120 | |
---|
121 | int test_main(int ac, char* av[]) |
---|
122 | { |
---|
123 | std::string input = file_content("utf8.txt"); |
---|
124 | std::string expected = file_content("ucs2.txt"); |
---|
125 | |
---|
126 | test_convert(input, expected); |
---|
127 | |
---|
128 | if (ac > 1) { |
---|
129 | cout << "Trying to convert the command line argument\n"; |
---|
130 | |
---|
131 | locale::global(locale("")); |
---|
132 | std::wstring w = boost::from_local_8_bit(av[1]); |
---|
133 | |
---|
134 | cout << "Got something, printing decimal code point values\n"; |
---|
135 | for (unsigned i = 0; i < w.size(); ++i) { |
---|
136 | cout << (unsigned)w[i] << "\n"; |
---|
137 | } |
---|
138 | |
---|
139 | } |
---|
140 | |
---|
141 | return 0; |
---|
142 | } |
---|