Planet
navi homePPSaboutscreenshotsdownloaddevelopmentforum

source: downloads/boost_1_34_1/libs/iostreams/doc/tutorial/unix2dos_filters.html @ 29

Last change on this file since 29 was 29, checked in by landauf, 16 years ago

updated boost from 1_33_1 to 1_34_1

File size: 13.8 KB
Line 
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
2<HTML>
3<HEAD>
4    <TITLE>Tutorial</TITLE>
5    <LINK REL="stylesheet" HREF="../../../../boost.css">
6    <LINK REL="stylesheet" HREF="../theme/iostreams.css">
7</HEAD>
8<BODY>
9
10<!-- Begin Banner -->
11
12    <H1 CLASS="title">Tutorial</H1>
13    <HR CLASS="banner">
14
15<!-- End Banner -->
16
17<!-- Begin Nav -->
18
19<DIV CLASS='nav'>
20    <A HREF='dictionary_filters.html'><IMG BORDER=0 WIDTH=19 HEIGHT=19 SRC='../../../../doc/html/images/prev.png'></A>
21    <A HREF='tutorial.html'><IMG BORDER=0 WIDTH=19 HEIGHT=19 SRC='../../../../doc/html/images/up.png'></A>
22    <A HREF='multichar_filters.html'><IMG BORDER=0 WIDTH=19 HEIGHT=19 SRC='../../../../doc/html/images/next.png'></A>
23</DIV>
24
25<!-- End Nav -->
26
27<A NAME="unix2dos"></A>
28<H2>2.2.7. UNIX-to-DOS Filters</H2>
29
30<P>Suppose you want to write a Filter to convert <CODE>UNIX</CODE> line endings to <CODE>DOS</CODE> line-endings. The basic idea is simple: you process the characters in a sequence one at a time, and whenever you encounter the character
31<CODE>'\n'</CODE> you replace it with the two-character sequence <CODE>'\r'</CODE>, <CODE>'\n'</CODE>. In the following sections I'll implement this algorithm as a <CODE>stdio_filter</CODE>, an InputFilter and an OutputFilter. The source code can be found in the header <A HREF="../../example/unix2dos_filter.hpp">&lt;<CODE>libs/iostreams/example/unix2dos_filter.hpp</CODE>&gt;</A></P>
32
33<A NAME="unix2dos_stdio_filter"></A>
34<H4><CODE>unix2dos_stdio_filter</CODE></H4>
35
36<P>You can express a <CODE>UNIX</CODE>-to-<CODE>DOS</CODE> Filter as a <CODE>stdio_filter</CODE> by deriving from <CODE>stdio_filter</CODE> and overriding the <CODE>private</CODE> <CODE>virtual</CODE> function do_filter as follows:</P>
37
38<PRE CLASS="broken_ie"><SPAN CLASS='preprocessor'>#include</SPAN> <SPAN CLASS='literal'>&lt;cstdio&gt;</SPAN>    <SPAN CLASS='comment'>// EOF</SPAN>
39<SPAN CLASS='preprocessor'>#include</SPAN> <SPAN CLASS='literal'>&lt;iostream&gt;</SPAN>  <SPAN CLASS='comment'>// cin, cout</SPAN>
40<SPAN CLASS='preprocessor'>#include</SPAN> <A CLASS="header" HREF="../../../../boost/iostreams/filter/stdio.hpp"><SPAN CLASS='literal'>&lt;boost/iostreams/filter/stdio.hpp&gt;</SPAN></A>
41
42<SPAN CLASS='keyword'>namespace</SPAN> boost { <SPAN CLASS='keyword'>namespace</SPAN> iostreams { <SPAN CLASS='keyword'>namespace</SPAN> example {
43
44<SPAN CLASS="keyword">class</SPAN> unix2dos_stdio_filter : <SPAN CLASS="keyword">public</SPAN> stdio_filter {
45<SPAN CLASS="keyword">private</SPAN>:
46    <SPAN CLASS="keyword">void</SPAN> do_filter()
47    {
48        <SPAN CLASS="keyword">int</SPAN> c;
49        <SPAN CLASS="keyword">while</SPAN> ((c = std::cin.get()) != <SPAN CLASS='numeric_literal'>EOF</SPAN>) {
50            <SPAN CLASS="keyword">if</SPAN> (c == <SPAN CLASS='literal'>'\n'</SPAN>)
51                std::cout.put(<SPAN CLASS='literal'>'\r'</SPAN>);
52            std::cout.put(c);
53        }
54    }
55};
56
57} } } <SPAN CLASS='comment'>// End namespace boost::iostreams:example</SPAN></PRE>
58
59<P>The function <CODE>do_filter</CODE> consists of a straightforward implementation of the algorithm I described above: it reads characters from standard input and writes them to standard output unchanged, except that when it encounters <CODE>'\n'</CODE> it writes <CODE>'\r'</CODE>, <CODE>'\n'</CODE>.
60
61<A NAME="unix2dos_input_filter"></A>
62<H4><CODE>unix2dos_input_filter</CODE></H4>
63
64<P>Now, let's express a <CODE>UNIX</CODE>-to-<CODE>DOS</CODE> Filter as an <A HREF="../concepts/input_filter.html">InputFilter</A>.
65
66<PRE CLASS="broken_ie"><SPAN CLASS='preprocessor'>#include</SPAN> <A CLASS="header" HREF="../../../../boost/iostreams/categories.hpp"><SPAN CLASS='literal'>&lt;boost/iostreams/categories.hpp&gt;</SPAN></A> <SPAN CLASS='comment'>// input_filter_tag</SPAN>
67<SPAN CLASS='preprocessor'>#include</SPAN> <A CLASS="header" HREF="../../../../boost/iostreams/operations.hpp"><SPAN CLASS='literal'>&lt;boost/iostreams/operations.hpp&gt;</SPAN></A> <SPAN CLASS='comment'>// get</SPAN>
68
69<SPAN CLASS='keyword'>namespace</SPAN> boost { <SPAN CLASS='keyword'>namespace</SPAN> iostreams { <SPAN CLASS='keyword'>namespace</SPAN> example {
70
71<SPAN CLASS="keyword">class</SPAN> unix2dos_input_filter {
72<SPAN CLASS="keyword">public</SPAN>:
73    <SPAN CLASS='keyword'>typedef</SPAN> <SPAN CLASS='keyword'>char</SPAN>              char_type;
74    <SPAN CLASS='keyword'>typedef</SPAN> input_filter_tag  category;
75
76    unix2dos_input_filter() : has_linefeed_(<SPAN CLASS='keyword'>false</SPAN>) { }
77
78    <SPAN CLASS="keyword">template</SPAN>&lt;<SPAN CLASS="keyword">typename</SPAN> Source&gt;
79    <SPAN CLASS="keyword">int</SPAN> get(Source& src)
80    {
81        <SPAN CLASS='comment'>// Handle unfinished business</SPAN>
82        <SPAN CLASS="keyword">if</SPAN> (has_linefeed_) {
83            has_linefeed_ = <SPAN CLASS="keyword">false</SPAN>;
84            <SPAN CLASS="keyword">return</SPAN> <SPAN CLASS="literal">'\n'</SPAN>;
85        }
86
87        <SPAN CLASS='comment'>// Forward all characters except '\n'</SPAN>
88        <SPAN CLASS="keyword">int</SPAN> c;
89        if ((c = iostreams::get(src)) == <SPAN CLASS='literal'>'\n'</SPAN>) {
90            has_linefeed_ = true;
91            <SPAN CLASS="keyword">return</SPAN> <SPAN CLASS='literal'>'\r'</SPAN>;
92        }
93
94        <SPAN CLASS="keyword">return</SPAN> c;
95    }
96
97    <SPAN CLASS="keyword">template</SPAN>&lt;<SPAN CLASS="keyword">typename</SPAN> Source&gt;
98    <SPAN CLASS="keyword">void</SPAN> close(Source&);
99<SPAN CLASS="keyword">private</SPAN>:
100    <SPAN CLASS="keyword">bool</SPAN> has_linefeed_;
101};
102
103} } } <SPAN CLASS='comment'>// End namespace boost::iostreams:example</SPAN></PRE>
104
105<P>The implementation of <CODE>get</CODE> can be described as follows. Most of the time, you simply read a character from <CODE>src</CODE> and return it. The special values <CODE>EOF</CODE> and <CODE>WOULD_BLOCK</CODE> are treated the same way: they are simply forwarded <I>as-is</I>. The exception is when <CODE>iostreams::get</CODE> returns <CODE>'\n'</CODE>. In this case, you return <CODE>'\r'</CODE> instead and make a note to return <CODE>'\n'</CODE> the next time <CODE>get</CODE> is called.</P>
106
107<P>As usual, the member function <CODE>close</CODE> reset's the Filter's state:</P>
108
109<PRE CLASS="broken_ie">    <SPAN CLASS="keyword">template</SPAN>&lt;<SPAN CLASS="keyword">typename</SPAN> Source&gt;
110    <SPAN CLASS="keyword">void</SPAN> close(Source&) { skip_ = <SPAN CLASS="keyword">false</SPAN>; }</PRE>
111
112<A NAME="unix2dos_output_filter"></A>
113<H4><CODE>unix2dos_output_filter</CODE></H4>
114
115<P>You can express a <CODE>UNIX</CODE>-to-<CODE>DOS</CODE> Filter as an <A HREF="../concepts/output_filter.html">OutputFilter</A> as follows:</P>
116
117<PRE CLASS="broken_ie"><SPAN CLASS='preprocessor'>#include</SPAN> <A CLASS="header" HREF="../../../../boost/iostreams/concepts.hpp"><SPAN CLASS='literal'>&lt;boost/iostreams/concepts.hpp&gt;</SPAN></A>   <SPAN CLASS='comment'>// output_filter</SPAN>
118<SPAN CLASS='preprocessor'>#include</SPAN> <A CLASS="header" HREF="../../../../boost/iostreams/operations.hpp"><SPAN CLASS='literal'>&lt;boost/iostreams/operations.hpp&gt;</SPAN></A> <SPAN CLASS='comment'>// put</SPAN>
119
120<SPAN CLASS='keyword'>namespace</SPAN> boost { <SPAN CLASS='keyword'>namespace</SPAN> iostreams { <SPAN CLASS='keyword'>namespace</SPAN> example {
121
122<SPAN CLASS="keyword">class</SPAN> unix2dos_output_filter : <SPAN CLASS="keyword">public</SPAN> output_filter {
123<SPAN CLASS="keyword">public</SPAN>:
124    unix2dos_output_filter() : has_linefeed_(<SPAN CLASS="keyword">false</SPAN>) { }
125
126    <SPAN CLASS="keyword">template</SPAN>&lt;<SPAN CLASS="keyword">typename</SPAN> Sink&gt;
127    <SPAN CLASS="keyword">bool</SPAN> put(Sink& dest, <SPAN CLASS="keyword">int</SPAN> c);
128
129    <SPAN CLASS="keyword">template</SPAN>&lt;<SPAN CLASS="keyword">typename</SPAN> Sink&gt;
130    <SPAN CLASS="keyword">void</SPAN> close(Sink&) { has_linefeed_ = <SPAN CLASS="keyword">false</SPAN>; }
131<SPAN CLASS="keyword">private</SPAN>:
132    <SPAN CLASS="keyword">template</SPAN>&lt;<SPAN CLASS="keyword">typename</SPAN> Sink&gt;
133    <SPAN CLASS="keyword">bool</SPAN> put_char(Sink& dest, <SPAN CLASS="keyword">int</SPAN> c);
134
135    <SPAN CLASS="keyword">bool</SPAN> has_linefeed_;
136};
137
138} } } <SPAN CLASS='comment'>// End namespace boost::iostreams:example</SPAN></PRE>
139
140<P>
141    Here I've derived from the helper class <A HREF="../classes/filter.html#synopsis"><CODE>output_filter</CODE></A>, which provides a member type <CODE>char_type</CODE> equal to <CODE>char</CODE> and a category tag convertible to <A HREF="../guide/traits.html#category_tags"><CODE>output_filter_tag</CODE></A> and to <A HREF="../guide/traits.html#category_tags"><CODE>closable_tag</CODE></A>.
142</P>
143
144<P>Let's look first at the helper function <CODE>put_char</CODE>:</P>
145
146<PRE CLASS='broken_ie'>    <SPAN CLASS="keyword">template</SPAN>&lt;<SPAN CLASS="keyword">typename</SPAN> Sink&gt;
147    <SPAN CLASS="keyword">bool</SPAN> put_char(Sink& dest, <SPAN CLASS="keyword">int</SPAN> c)
148    {
149        <SPAN CLASS="keyword">bool</SPAN> result;
150        <SPAN CLASS="keyword">if</SPAN> ((result = iostreams::put(dest, c)) == <SPAN CLASS="keyword">true</SPAN>) {
151            has_linefeed_ =
152                c == <SPAN CLASS="literal">'\r'</SPAN> ?
153                    <SPAN CLASS="keyword">true</SPAN> :
154                    c == <SPAN CLASS="literal">'\n'</SPAN> ?
155                        <SPAN CLASS="keyword">false</SPAN> :
156                        has_linefeed_;
157        }
158        <SPAN CLASS="keyword">return</SPAN> result;
159    }</PRE>
160
161<P>
162    This function attempts to write a single character to the Sink dest, returning <CODE>true</CODE> for success. If successful, it updates the flag <CODE>has_linefeed_</CODE>, which indicates that an attempt to write a <CODE>DOS</CODE> line ending sequence failed after the first character was written. 
163</P>
164
165<P>Using <CODE>put_char</CODE> you can implement <CODE>put</CODE> as follows:</P>
166
167<PRE CLASS='broken_ie'>    <SPAN CLASS="keyword">bool</SPAN> put(Sink& dest, <SPAN CLASS="keyword">int</SPAN> c)
168    {
169        <SPAN CLASS="keyword">if</SPAN> (c == <SPAN CLASS="literal">'\n'</SPAN>)
170            <SPAN CLASS="keyword">return</SPAN> has_linefeed_ ?
171                put_char(dest, <SPAN CLASS="literal">'\n'</SPAN>) :
172                put_char(dest, <SPAN CLASS="literal">'\r'</SPAN>) ?
173                    this-&gt;put(dest, <SPAN CLASS="literal">'\n'</SPAN>) :
174                    <SPAN CLASS="keyword">false</SPAN>;
175        <SPAN CLASS="keyword">return</SPAN> iostreams::put(dest, c);
176    }</PRE>
177
178<P>The implementation works like so:</P>
179
180<OL>
181<LI>
182    If you're at the beginning of a <CODE>DOS</CODE> line-ending sequence &#8212; that is, if <CODE>c</CODE> is <CODE>'n'</CODE> and <CODE>has_line_feed_</CODE> is <CODE>false</CODE> &#8212; you attempt to write <CODE>'\r'</CODE> and then <CODE>'\n'</CODE> to <CODE>dest</CODE>.
183</LI>
184<LI>
185    If you're in the middle of a <CODE>DOS</CODE> line-ending sequence &#8212; that is, if <CODE>c</CODE> is <CODE>'n'</CODE> and <CODE>has_line_feed_</CODE> is <CODE>true</CODE> &#8212; you attempt to complete it by writing <CODE>'\n'</CODE>.
186</LI>
187<LI>
188    Otherwise, you attempt to write <CODE>c</CODE> to <CODE>dest</CODE>.
189</LI>
190</OL>
191
192<P>
193    There are two subtle points. First, why does <CODE>c == 'n'</CODE> and <CODE>has_line_feed_ == true</CODE> mean that you're in the middle of a <CODE>DOS</CODE> line-ending sequence? Because when you attempt to write <CODE>'\r'</CODE>, <CODE>'\n'</CODE> but only the first character succeeds, you set <CODE>has_line_feed_</CODE> and return <CODE>false</CODE>. This causes the user of the Filter to <I>resend</I> the character <CODE>'\n'</CODE> which triggered the line-ending sequence. Second, note that to write the second character of a line-ending sequence you call <CODE>put</CODE> recursively instead of calling <CODE>put_char</CODE>.
194</P>
195
196<P>
197    Comparing the implementations of <CODE>unix2dos_input_filter</CODE> and <CODE>unix2dos_output_filter</CODE>, you can see that this a case where a filtering algorithm is much easier to express as an Input than as an OutputFilter. If you wanted to avoid the complexity of the above definition, you could use the class template <A HREF="../functions/invert.html#invert"><CODE>inverse</CODE></A> to construct an OutputFilter from <CODE>unix2dos_input_filter</CODE>:
198</P>
199
200<PRE CLASS="broken_ie"><SPAN CLASS='literal'>#include</SPAN></SPAN> <A CLASS="header" HREF="../../../../boost/iostreams/concepts.hpp"><SPAN CLASS='literal'>&lt;boost/iostreams/invert.hpp&gt;</SPAN></A>   <SPAN CLASS='comment'>// inverse</SPAN>   
201
202<SPAN CLASS='keyword'>namespace</SPAN> io = boost::iostreams;
203<SPAN CLASS='keyword'>namespace</SPAN> ex = boost::iostreams::example;
204
205<SPAN CLASS="keyword">typedef</SPAN> io::inverse&lt;ex::unix2dos_input_filter&gt; unix2dos_output_filter;</PRE>
206
207<P>Even this is more work than necessary, however, since line-ending conversions can be handled easily with the built-in component <A HREF="../classes/newline_filter.html#newline_filter"><CODE>newline_filter</CODE></A>.</P>
208
209<!-- Begin Nav -->
210
211<DIV CLASS='nav'>
212    <A HREF='dictionary_filters.html'><IMG BORDER=0 WIDTH=19 HEIGHT=19 SRC='../../../../doc/html/images/prev.png'></A>
213    <A HREF='tutorial.html'><IMG BORDER=0 WIDTH=19 HEIGHT=19 SRC='../../../../doc/html/images/up.png'></A>
214    <A HREF='multichar_filters.html'><IMG BORDER=0 WIDTH=19 HEIGHT=19 SRC='../../../../doc/html/images/next.png'></A>
215</DIV>
216
217<!-- End Nav -->
218
219<!-- Begin Footer -->
220
221<HR>
222
223<P CLASS="copyright">Revised
224<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
22520 May, 2004
226<!--webbot bot="Timestamp" endspan i-checksum="38504" -->
227</P>
228
229<P CLASS="copyright">&copy; Copyright <A HREF="http://www.kangaroologic.com" TARGET="_top">Jonathan Turkanis</A>, 2004</P>
230<P CLASS="copyright"> 
231    Use, modification, and distribution are subject to the Boost Software License, Version 2.0. (See accompanying file <A HREF="../../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A> or copy at <A HREF="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)
232</P>
233<!-- End Footer -->
234
235</BODY>
Note: See TracBrowser for help on using the repository browser.