libstdc++
locale_conv.h
Go to the documentation of this file.
1 // wstring_convert implementation -*- C++ -*-
2 
3 // Copyright (C) 2015 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /** @file bits/locale_conv.h
26  * This is an internal header file, included by other library headers.
27  * Do not attempt to use it directly. @headername{locale}
28  */
29 
30 #ifndef _LOCALE_CONV_H
31 #define _LOCALE_CONV_H 1
32 
33 #if __cplusplus < 201103L
34 # include <bits/c++0x_warning.h>
35 #else
36 
37 #include <streambuf>
38 #include "stringfwd.h"
39 #include "allocator.h"
40 #include "codecvt.h"
41 #include "unique_ptr.h"
42 
43 namespace std _GLIBCXX_VISIBILITY(default)
44 {
45 _GLIBCXX_BEGIN_NAMESPACE_VERSION
46 
47 #ifdef _GLIBCXX_USE_WCHAR_T
48 
49  /**
50  * @addtogroup locales
51  * @{
52  */
53 
54 _GLIBCXX_BEGIN_NAMESPACE_CXX11
55  /// String conversions
56  template<typename _Codecvt, typename _Elem = wchar_t,
57  typename _Wide_alloc = allocator<_Elem>,
58  typename _Byte_alloc = allocator<char>>
60  {
61  public:
64  typedef typename _Codecvt::state_type state_type;
65  typedef typename wide_string::traits_type::int_type int_type;
66 
67  /** Default constructor.
68  *
69  * @param __pcvt The facet to use for conversions.
70  *
71  * Takes ownership of @p __pcvt and will delete it in the destructor.
72  */
73  explicit
74  wstring_convert(_Codecvt* __pcvt = new _Codecvt()) : _M_cvt(__pcvt)
75  {
76  if (!_M_cvt)
77  __throw_logic_error("wstring_convert");
78  }
79 
80  /** Construct with an initial converstion state.
81  *
82  * @param __pcvt The facet to use for conversions.
83  * @param __state Initial conversion state.
84  *
85  * Takes ownership of @p __pcvt and will delete it in the destructor.
86  * The object's conversion state will persist between conversions.
87  */
88  wstring_convert(_Codecvt* __pcvt, state_type __state)
89  : _M_cvt(__pcvt), _M_state(__state), _M_with_cvtstate(true)
90  {
91  if (!_M_cvt)
92  __throw_logic_error("wstring_convert");
93  }
94 
95  /** Construct with error strings.
96  *
97  * @param __byte_err A string to return on failed conversions.
98  * @param __wide_err A wide string to return on failed conversions.
99  */
100  explicit
101  wstring_convert(const byte_string& __byte_err,
102  const wide_string& __wide_err = wide_string())
103  : _M_cvt(new _Codecvt),
104  _M_byte_err_string(__byte_err), _M_wide_err_string(__wide_err),
105  _M_with_strings(true)
106  {
107  if (!_M_cvt)
108  __throw_logic_error("wstring_convert");
109  }
110 
111  ~wstring_convert() = default;
112 
113  // _GLIBCXX_RESOLVE_LIB_DEFECTS
114  // 2176. Special members for wstring_convert and wbuffer_convert
115  wstring_convert(const wstring_convert&) = delete;
116  wstring_convert& operator=(const wstring_convert&) = delete;
117 
118  /// @{ Convert from bytes.
119  wide_string
120  from_bytes(char __byte)
121  {
122  char __bytes[2] = { __byte };
123  return from_bytes(__bytes, __bytes+1);
124  }
125 
126  wide_string
127  from_bytes(const char* __ptr)
128  { return from_bytes(__ptr, __ptr+char_traits<char>::length(__ptr)); }
129 
130  wide_string
131  from_bytes(const byte_string& __str)
132  {
133  auto __ptr = __str.data();
134  return from_bytes(__ptr, __ptr + __str.size());
135  }
136 
137  wide_string
138  from_bytes(const char* __first, const char* __last)
139  {
140  auto __errstr = _M_with_strings ? &_M_wide_err_string : nullptr;
141  _ConvFn<char, _Elem> __fn = &_Codecvt::in;
142  return _M_conv(__first, __last, __errstr, __fn);
143  }
144  /// @}
145 
146  /// @{ Convert to bytes.
147  byte_string
148  to_bytes(_Elem __wchar)
149  {
150  _Elem __wchars[2] = { __wchar };
151  return to_bytes(__wchars, __wchars+1);
152  }
153 
154  byte_string
155  to_bytes(const _Elem* __ptr)
156  {
157  return to_bytes(__ptr, __ptr+wide_string::traits_type::length(__ptr));
158  }
159 
160  byte_string
161  to_bytes(const wide_string& __wstr)
162  {
163  auto __ptr = __wstr.data();
164  return to_bytes(__ptr, __ptr + __wstr.size());
165  }
166 
167  byte_string
168  to_bytes(const _Elem* __first, const _Elem* __last)
169  {
170  auto __errstr = _M_with_strings ? &_M_byte_err_string : nullptr;
171  _ConvFn<_Elem, char> __fn = &_Codecvt::out;
172  return _M_conv(__first, __last, __errstr, __fn);
173  }
174  /// @}
175 
176  // _GLIBCXX_RESOLVE_LIB_DEFECTS
177  // 2174. wstring_convert::converted() should be noexcept
178  /// The number of elements successfully converted in the last conversion.
179  size_t converted() const noexcept { return _M_count; }
180 
181  /// The final conversion state of the last conversion.
182  state_type state() const { return _M_state; }
183 
184  private:
185  template<typename _InC, typename _OutC>
186  using _ConvFn
187  = codecvt_base::result
188  (_Codecvt::*)(state_type&, const _InC*, const _InC*, const _InC*&,
189  _OutC*, _OutC*, _OutC*&) const;
190 
191  template<typename _InChar, typename _OutStr, typename _MemFn>
192  _OutStr
193  _M_conv(const _InChar* __first, const _InChar* __last,
194  const _OutStr* __err, _MemFn __memfn)
195  {
196  auto __outstr = __err ? _OutStr(__err->get_allocator()) : _OutStr();
197 
198  if (__first == __last)
199  {
200  _M_count = 0;
201  return __outstr;
202  }
203 
204  if (!_M_with_cvtstate)
205  _M_state = state_type();
206 
207  size_t __outchars = 0;
208  auto __next = __first;
209  const auto __maxlen = _M_cvt->max_length() + 1;
210 
211  codecvt_base::result __result;
212  do
213  {
214  __outstr.resize(__outstr.size() + (__last - __next) * __maxlen);
215  auto __outnext = &__outstr.front() + __outchars;
216  auto const __outlast = &__outstr.back() + 1;
217  __result = ((*_M_cvt).*__memfn)(_M_state, __next, __last, __next,
218  __outnext, __outlast, __outnext);
219  __outchars = __outnext - &__outstr.front();
220  }
221  while (__result == codecvt_base::partial && __next != __last
222  && (__outstr.size() - __outchars) < __maxlen);
223 
224  if (__result == codecvt_base::noconv)
225  {
226  __outstr.assign(__first, __last);
227  _M_count = __outstr.size();
228  return __outstr;
229  }
230 
231  __outstr.resize(__outchars);
232  _M_count = __next - __first;
233 
234  if (__result != codecvt_base::error)
235  return __outstr;
236  else if (__err)
237  return *__err;
238  else
239  __throw_range_error("wstring_convert");
240  }
241 
242  unique_ptr<_Codecvt> _M_cvt;
243  byte_string _M_byte_err_string;
244  wide_string _M_wide_err_string;
245  state_type _M_state = state_type();
246  size_t _M_count = 0;
247  bool _M_with_cvtstate = false;
248  bool _M_with_strings = false;
249  };
250 _GLIBCXX_END_NAMESPACE_CXX11
251 
252  /// Buffer conversions
253  template<typename _Codecvt, typename _Elem = wchar_t,
254  typename _Tr = char_traits<_Elem>>
255  class wbuffer_convert : public basic_streambuf<_Elem, _Tr>
256  {
257  typedef basic_streambuf<_Elem, _Tr> _Wide_streambuf;
258 
259  public:
260  typedef typename _Codecvt::state_type state_type;
261 
262  /** Default constructor.
263  *
264  * @param __bytebuf The underlying byte stream buffer.
265  * @param __pcvt The facet to use for conversions.
266  * @param __state Initial conversion state.
267  *
268  * Takes ownership of @p __pcvt and will delete it in the destructor.
269  */
270  explicit
271  wbuffer_convert(streambuf* __bytebuf = 0, _Codecvt* __pcvt = new _Codecvt,
272  state_type __state = state_type())
273  : _M_buf(__bytebuf), _M_cvt(__pcvt), _M_state(__state)
274  {
275  if (!_M_cvt)
276  __throw_logic_error("wbuffer_convert");
277 
278  _M_always_noconv = _M_cvt->always_noconv();
279 
280  if (_M_buf)
281  {
282  this->setp(_M_put_area, _M_put_area + _S_buffer_length);
283  this->setg(_M_get_area + _S_putback_length,
284  _M_get_area + _S_putback_length,
285  _M_get_area + _S_putback_length);
286  }
287  }
288 
289  ~wbuffer_convert() = default;
290 
291  // _GLIBCXX_RESOLVE_LIB_DEFECTS
292  // 2176. Special members for wstring_convert and wbuffer_convert
293  wbuffer_convert(const wbuffer_convert&) = delete;
294  wbuffer_convert& operator=(const wbuffer_convert&) = delete;
295 
296  streambuf* rdbuf() const noexcept { return _M_buf; }
297 
298  streambuf*
299  rdbuf(streambuf *__bytebuf) noexcept
300  {
301  auto __prev = _M_buf;
302  _M_buf = __bytebuf;
303  return __prev;
304  }
305 
306  /// The conversion state following the last conversion.
307  state_type state() const noexcept { return _M_state; }
308 
309  protected:
310  int
311  sync()
312  { return _M_buf && _M_conv_put() && _M_buf->pubsync() ? 0 : -1; }
313 
314  typename _Wide_streambuf::int_type
315  overflow(typename _Wide_streambuf::int_type __out)
316  {
317  if (!_M_buf || !_M_conv_put())
318  return _Tr::eof();
319  else if (!_Tr::eq_int_type(__out, _Tr::eof()))
320  return this->sputc(__out);
321  return _Tr::not_eof(__out);
322  }
323 
324  typename _Wide_streambuf::int_type
325  underflow()
326  {
327  if (!_M_buf)
328  return _Tr::eof();
329 
330  if (this->gptr() < this->egptr() || (_M_buf && _M_conv_get()))
331  return _Tr::to_int_type(*this->gptr());
332  else
333  return _Tr::eof();
334  }
335 
336  streamsize
337  xsputn(const typename _Wide_streambuf::char_type* __s, streamsize __n)
338  {
339  if (!_M_buf || __n == 0)
340  return 0;
341  streamsize __done = 0;
342  do
343  {
344  auto __nn = std::min<streamsize>(this->epptr() - this->pptr(),
345  __n - __done);
346  _Tr::copy(this->pptr(), __s + __done, __nn);
347  this->pbump(__nn);
348  __done += __nn;
349  } while (__done < __n && _M_conv_put());
350  return __done;
351  }
352 
353  private:
354  // fill the get area from converted contents of the byte stream buffer
355  bool
356  _M_conv_get()
357  {
358  const streamsize __pb1 = this->gptr() - this->eback();
359  const streamsize __pb2 = _S_putback_length;
360  const streamsize __npb = std::min(__pb1, __pb2);
361 
362  _Tr::move(_M_get_area + _S_putback_length - __npb,
363  this->gptr() - __npb, __npb);
364 
365  streamsize __nbytes = sizeof(_M_get_buf) - _M_unconv;
366  __nbytes = std::min(__nbytes, _M_buf->in_avail());
367  if (__nbytes < 1)
368  __nbytes == 1;
369  __nbytes = _M_buf->sgetn(_M_get_buf + _M_unconv, __nbytes);
370  if (__nbytes < 1)
371  return false;
372  __nbytes += _M_unconv;
373 
374  // convert _M_get_buf into _M_get_area
375 
376  _Elem* __outbuf = _M_get_area + _S_putback_length;
377  _Elem* __outnext = __outbuf;
378  const char* __bnext = _M_get_buf;
379 
380  codecvt_base::result __result;
381  if (_M_always_noconv)
382  __result = codecvt_base::noconv;
383  else
384  {
385  _Elem* __outend = _M_get_area + _S_buffer_length;
386 
387  __result = _M_cvt->in(_M_state,
388  __bnext, __bnext + __nbytes, __bnext,
389  __outbuf, __outend, __outnext);
390  }
391 
392  if (__result == codecvt_base::noconv)
393  {
394  // cast is safe because noconv means _Elem is same type as char
395  auto __get_buf = reinterpret_cast<const _Elem*>(_M_get_buf);
396  _Tr::copy(__outbuf, __get_buf, __nbytes);
397  _M_unconv = 0;
398  return true;
399  }
400 
401  if ((_M_unconv = _M_get_buf + __nbytes - __bnext))
402  char_traits<char>::move(_M_get_buf, __bnext, _M_unconv);
403 
404  this->setg(__outbuf, __outbuf, __outnext);
405 
406  return __result != codecvt_base::error;
407  }
408 
409  // unused
410  bool
411  _M_put(...)
412  { return false; }
413 
414  bool
415  _M_put(const char* __p, streamsize __n)
416  {
417  if (_M_buf->sputn(__p, __n) < __n)
418  return false;
419  }
420 
421  // convert the put area and write to the byte stream buffer
422  bool
423  _M_conv_put()
424  {
425  _Elem* const __first = this->pbase();
426  const _Elem* const __last = this->pptr();
427  const streamsize __pending = __last - __first;
428 
429  if (_M_always_noconv)
430  return _M_put(__first, __pending);
431 
432  char __outbuf[2 * _S_buffer_length];
433 
434  const _Elem* __next = __first;
435  const _Elem* __start;
436  do
437  {
438  __start = __next;
439  char* __outnext = __outbuf;
440  char* const __outlast = __outbuf + sizeof(__outbuf);
441  auto __result = _M_cvt->out(_M_state, __next, __last, __next,
442  __outnext, __outlast, __outnext);
443  if (__result == codecvt_base::error)
444  return false;
445  else if (__result == codecvt_base::noconv)
446  return _M_put(__next, __pending);
447 
448  if (!_M_put(__outbuf, __outnext - __outbuf))
449  return false;
450  }
451  while (__next != __last && __next != __start);
452 
453  if (__next != __last)
454  _Tr::move(__first, __next, __last - __next);
455 
456  this->pbump(__first - __next);
457  return __next != __first;
458  }
459 
460  streambuf* _M_buf;
461  unique_ptr<_Codecvt> _M_cvt;
462  state_type _M_state;
463 
464  static const streamsize _S_buffer_length = 32;
465  static const streamsize _S_putback_length = 3;
466  _Elem _M_put_area[_S_buffer_length];
467  _Elem _M_get_area[_S_buffer_length];
468  streamsize _M_unconv = 0;
469  char _M_get_buf[_S_buffer_length-_S_putback_length];
470  bool _M_always_noconv;
471  };
472 
473  /// @} group locales
474 
475 #endif // _GLIBCXX_USE_WCHAR_T
476 
477 _GLIBCXX_END_NAMESPACE_VERSION
478 } // namespace
479 
480 #endif // __cplusplus
481 
482 #endif /* _LOCALE_CONV_H */
byte_string to_bytes(const _Elem *__ptr)
Convert to bytes.
Definition: locale_conv.h:155
wstring_convert(const byte_string &__byte_err, const wide_string &__wide_err=wide_string())
Definition: locale_conv.h:101
constexpr std::remove_reference< _Tp >::type && move(_Tp &&__t) noexcept
Convert a value to an rvalue.
Definition: move.h:101
byte_string to_bytes(const _Elem *__first, const _Elem *__last)
Convert to bytes.
Definition: locale_conv.h:168
state_type state() const
The final conversion state of the last conversion.
Definition: locale_conv.h:182
state_type state() const noexcept
The conversion state following the last conversion.
Definition: locale_conv.h:307
wstring_convert(_Codecvt *__pcvt=new _Codecvt())
Definition: locale_conv.h:74
ptrdiff_t streamsize
Integral type for I/O operation counts and buffer sizes.
Definition: postypes.h:98
Managing sequences of characters and character-like objects.
_GLIBCXX14_CONSTEXPR const _Tp & min(const _Tp &, const _Tp &)
This does what you think it does.
Definition: stl_algobase.h:195
Buffer conversions.
Definition: locale_conv.h:255
wbuffer_convert(streambuf *__bytebuf=0, _Codecvt *__pcvt=new _Codecvt, state_type __state=state_type())
Definition: locale_conv.h:271
byte_string to_bytes(const wide_string &__wstr)
Convert to bytes.
Definition: locale_conv.h:161
wide_string from_bytes(const char *__first, const char *__last)
Convert from bytes.
Definition: locale_conv.h:138
size_t converted() const noexcept
The number of elements successfully converted in the last conversion.
Definition: locale_conv.h:179
size_type size() const noexcept
Returns the number of characters in the string, not including any null-termination.
wide_string from_bytes(char __byte)
Convert from bytes.
Definition: locale_conv.h:120
byte_string to_bytes(_Elem __wchar)
Convert to bytes.
Definition: locale_conv.h:148
wide_string from_bytes(const byte_string &__str)
Convert from bytes.
Definition: locale_conv.h:131
ISO C++ entities toplevel namespace is std.
wstring_convert(_Codecvt *__pcvt, state_type __state)
Definition: locale_conv.h:88
const _CharT * data() const noexcept
Return const pointer to contents.
String conversions.
Definition: locale_conv.h:59
wide_string from_bytes(const char *__ptr)
Convert from bytes.
Definition: locale_conv.h:127
Basis for explicit traits specializations.
Definition: char_traits.h:227