1 | #ifndef DATE_TIME_TZ_DB_BASE_HPP__ |
---|
2 | #define DATE_TIME_TZ_DB_BASE_HPP__ |
---|
3 | |
---|
4 | /* Copyright (c) 2003-2005 CrystalClear Software, Inc. |
---|
5 | * Subject to the Boost Software License, Version 1.0. |
---|
6 | * (See accompanying file LICENSE-1.0 or http://www.boost.org/LICENSE-1.0) |
---|
7 | * Author: Jeff Garland, Bart Garst |
---|
8 | * $Date: 2005/10/23 20:15:06 $ |
---|
9 | */ |
---|
10 | |
---|
11 | #include "boost/shared_ptr.hpp" |
---|
12 | #include "boost/date_time/time_zone_names.hpp" |
---|
13 | #include "boost/date_time/time_zone_base.hpp" |
---|
14 | #include "boost/date_time/time_parsing.hpp" |
---|
15 | #include "boost/tokenizer.hpp" |
---|
16 | #include <string> |
---|
17 | #include <sstream> |
---|
18 | #include <map> |
---|
19 | #include <vector> |
---|
20 | #include <stdexcept> |
---|
21 | #include <fstream> |
---|
22 | |
---|
23 | namespace boost { |
---|
24 | namespace date_time { |
---|
25 | |
---|
26 | //! Exception thrown when tz database cannot locate requested data file |
---|
27 | class data_not_accessible : public std::logic_error |
---|
28 | { |
---|
29 | public: |
---|
30 | data_not_accessible() : |
---|
31 | std::logic_error(std::string("Unable to locate or access the required datafile.")) |
---|
32 | {} |
---|
33 | data_not_accessible(const std::string& filespec) : |
---|
34 | std::logic_error(std::string("Unable to locate or access the required datafile. Filespec: " + filespec)) |
---|
35 | {} |
---|
36 | }; |
---|
37 | |
---|
38 | //! Exception thrown when tz database locates incorrect field structure in data file |
---|
39 | class bad_field_count : public std::out_of_range |
---|
40 | { |
---|
41 | public: |
---|
42 | bad_field_count(const std::string& s) : |
---|
43 | std::out_of_range(s) |
---|
44 | {} |
---|
45 | }; |
---|
46 | |
---|
47 | //! Creates a database of time_zones from csv datafile |
---|
48 | /*! The csv file containing the zone_specs used by the |
---|
49 | * tz_db_base is intended to be customized by the |
---|
50 | * library user. When customizing this file (or creating your own) the |
---|
51 | * file must follow a specific format. |
---|
52 | * |
---|
53 | * This first line is expected to contain column headings and is therefore |
---|
54 | * not processed by the tz_db_base. |
---|
55 | * |
---|
56 | * Each record (line) must have eleven fields. Some of those fields can |
---|
57 | * be empty. Every field (even empty ones) must be enclosed in |
---|
58 | * double-quotes. |
---|
59 | * Ex: |
---|
60 | * @code |
---|
61 | * "America/Phoenix" <- string enclosed in quotes |
---|
62 | * "" <- empty field |
---|
63 | * @endcode |
---|
64 | * |
---|
65 | * Some fields represent a length of time. The format of these fields |
---|
66 | * must be: |
---|
67 | * @code |
---|
68 | * "{+|-}hh:mm[:ss]" <- length-of-time format |
---|
69 | * @endcode |
---|
70 | * Where the plus or minus is mandatory and the seconds are optional. |
---|
71 | * |
---|
72 | * Since some time zones do not use daylight savings it is not always |
---|
73 | * necessary for every field in a zone_spec to contain a value. All |
---|
74 | * zone_specs must have at least ID and GMT offset. Zones that use |
---|
75 | * daylight savings must have all fields filled except: |
---|
76 | * STD ABBR, STD NAME, DST NAME. You should take note |
---|
77 | * that DST ABBR is mandatory for zones that use daylight savings |
---|
78 | * (see field descriptions for further details). |
---|
79 | * |
---|
80 | * ******* Fields and their description/details ********* |
---|
81 | * |
---|
82 | * ID: |
---|
83 | * Contains the identifying string for the zone_spec. Any string will |
---|
84 | * do as long as it's unique. No two ID's can be the same. |
---|
85 | * |
---|
86 | * STD ABBR: |
---|
87 | * STD NAME: |
---|
88 | * DST ABBR: |
---|
89 | * DST NAME: |
---|
90 | * These four are all the names and abbreviations used by the time |
---|
91 | * zone being described. While any string will do in these fields, |
---|
92 | * care should be taken. These fields hold the strings that will be |
---|
93 | * used in the output of many of the local_time classes. |
---|
94 | * Ex: |
---|
95 | * @code |
---|
96 | * time_zone nyc = tz_db.time_zone_from_region("America/New_York"); |
---|
97 | * local_time ny_time(date(2004, Aug, 30), IS_DST, nyc); |
---|
98 | * cout << ny_time.to_long_string() << endl; |
---|
99 | * // 2004-Aug-30 00:00:00 Eastern Daylight Time |
---|
100 | * cout << ny_time.to_short_string() << endl; |
---|
101 | * // 2004-Aug-30 00:00:00 EDT |
---|
102 | * @endcode |
---|
103 | * |
---|
104 | * NOTE: The exact format/function names may vary - see local_time |
---|
105 | * documentation for further details. |
---|
106 | * |
---|
107 | * GMT offset: |
---|
108 | * This is the number of hours added to utc to get the local time |
---|
109 | * before any daylight savings adjustments are made. Some examples |
---|
110 | * are: America/New_York offset -5 hours, & Africa/Cairo offset +2 hours. |
---|
111 | * The format must follow the length-of-time format described above. |
---|
112 | * |
---|
113 | * DST adjustment: |
---|
114 | * The amount of time added to gmt_offset when daylight savings is in |
---|
115 | * effect. The format must follow the length-of-time format described |
---|
116 | * above. |
---|
117 | * |
---|
118 | * DST Start Date rule: |
---|
119 | * This is a specially formatted string that describes the day of year |
---|
120 | * in which the transition take place. It holds three fields of it's own, |
---|
121 | * separated by semicolons. |
---|
122 | * The first field indicates the "nth" weekday of the month. The possible |
---|
123 | * values are: 1 (first), 2 (second), 3 (third), 4 (fourth), 5 (fifth), |
---|
124 | * and -1 (last). |
---|
125 | * The second field indicates the day-of-week from 0-6 (Sun=0). |
---|
126 | * The third field indicates the month from 1-12 (Jan=1). |
---|
127 | * |
---|
128 | * Examples are: "-1;5;9"="Last Friday of September", |
---|
129 | * "2;1;3"="Second Monday of March" |
---|
130 | * |
---|
131 | * Start time: |
---|
132 | * Start time is the number of hours past midnight, on the day of the |
---|
133 | * start transition, the transition takes place. More simply put, the |
---|
134 | * time of day the transition is made (in 24 hours format). The format |
---|
135 | * must follow the length-of-time format described above with the |
---|
136 | * exception that it must always be positive. |
---|
137 | * |
---|
138 | * DST End date rule: |
---|
139 | * See DST Start date rule. The difference here is this is the day |
---|
140 | * daylight savings ends (transition to STD). |
---|
141 | * |
---|
142 | * End time: |
---|
143 | * Same as Start time. |
---|
144 | */ |
---|
145 | template<class time_zone_type, class rule_type> |
---|
146 | class tz_db_base { |
---|
147 | public: |
---|
148 | /* Having CharT as a template parameter created problems |
---|
149 | * with posix_time::duration_from_string. Templatizing |
---|
150 | * duration_from_string was not possible at this time, however, |
---|
151 | * it should be possible in the future (when poor compilers get |
---|
152 | * fixed or stop being used). |
---|
153 | * Since this class was designed to use CharT as a parameter it |
---|
154 | * is simply typedef'd here to ease converting in back to a |
---|
155 | * parameter the future */ |
---|
156 | typedef char char_type; |
---|
157 | |
---|
158 | typedef typename time_zone_type::base_type time_zone_base_type; |
---|
159 | typedef typename time_zone_type::time_duration_type time_duration_type; |
---|
160 | typedef time_zone_names_base<char_type> time_zone_names; |
---|
161 | typedef dst_adjustment_offsets<time_duration_type> dst_adjustment_offsets; |
---|
162 | typedef std::basic_string<char_type> string_type; |
---|
163 | |
---|
164 | //! Constructs an empty database |
---|
165 | tz_db_base() {} |
---|
166 | |
---|
167 | //! Process csv data file, may throw exceptions |
---|
168 | /*! May throw data_not_accessible, or bad_field_count exceptions */ |
---|
169 | void load_from_file(const std::string& pathspec) |
---|
170 | { |
---|
171 | string_type in_str; |
---|
172 | std::string buff; |
---|
173 | |
---|
174 | std::ifstream ifs(pathspec.c_str()); |
---|
175 | if(!ifs){ |
---|
176 | throw data_not_accessible(pathspec); |
---|
177 | } |
---|
178 | std::getline(ifs, buff); // first line is column headings |
---|
179 | |
---|
180 | while( std::getline(ifs, buff)) { |
---|
181 | parse_string(buff); |
---|
182 | } |
---|
183 | } |
---|
184 | |
---|
185 | //! returns true if record successfully added to map |
---|
186 | /*! Takes an id string in the form of "America/Phoenix", and a |
---|
187 | * time_zone object for that region. The id string must be a unique |
---|
188 | * name that does not already exist in the database. */ |
---|
189 | bool add_record(const string_type& id, |
---|
190 | boost::shared_ptr<time_zone_base_type> tz) |
---|
191 | { |
---|
192 | typename map_type::value_type p(id, tz); |
---|
193 | return (m_zone_map.insert(p)).second; |
---|
194 | } |
---|
195 | |
---|
196 | //! Returns a time_zone object built from the specs for the given region |
---|
197 | /*! Returns a time_zone object built from the specs for the given |
---|
198 | * region. If region does not exist a local_time::record_not_found |
---|
199 | * exception will be thrown */ |
---|
200 | boost::shared_ptr<time_zone_base_type> |
---|
201 | time_zone_from_region(const string_type& region) const |
---|
202 | { |
---|
203 | // get the record |
---|
204 | typename map_type::const_iterator record = m_zone_map.find(region); |
---|
205 | if(record == m_zone_map.end()){ |
---|
206 | return boost::shared_ptr<time_zone_base_type>(); //null pointer |
---|
207 | } |
---|
208 | return record->second; |
---|
209 | } |
---|
210 | |
---|
211 | //! Returns a vector of strings holding the time zone regions in the database |
---|
212 | std::vector<std::string> region_list() const |
---|
213 | { |
---|
214 | typedef std::vector<std::string> vector_type; |
---|
215 | vector_type regions; |
---|
216 | typename map_type::const_iterator itr = m_zone_map.begin(); |
---|
217 | while(itr != m_zone_map.end()) { |
---|
218 | regions.push_back(itr->first); |
---|
219 | ++itr; |
---|
220 | } |
---|
221 | return regions; |
---|
222 | } |
---|
223 | |
---|
224 | private: |
---|
225 | typedef std::map<string_type, boost::shared_ptr<time_zone_base_type> > map_type; |
---|
226 | map_type m_zone_map; |
---|
227 | |
---|
228 | // start and end rule are of the same type |
---|
229 | typedef typename rule_type::start_rule::week_num week_num; |
---|
230 | |
---|
231 | /* TODO: mechanisms need to be put in place to handle different |
---|
232 | * types of rule specs. parse_rules() only handles nth_kday |
---|
233 | * rule types. */ |
---|
234 | |
---|
235 | //! parses rule specs for transition day rules |
---|
236 | rule_type* parse_rules(const string_type& sr, const string_type& er) const |
---|
237 | { |
---|
238 | using namespace gregorian; |
---|
239 | // start and end rule are of the same type, |
---|
240 | // both are included here for readability |
---|
241 | typedef typename rule_type::start_rule start_rule; |
---|
242 | typedef typename rule_type::end_rule end_rule; |
---|
243 | |
---|
244 | // these are: [start|end] nth, day, month |
---|
245 | int s_nth = 0, s_d = 0, s_m = 0; |
---|
246 | int e_nth = 0, e_d = 0, e_m = 0; |
---|
247 | split_rule_spec(s_nth, s_d, s_m, sr); |
---|
248 | split_rule_spec(e_nth, e_d, e_m, er); |
---|
249 | |
---|
250 | typename start_rule::week_num s_wn, e_wn; |
---|
251 | s_wn = get_week_num(s_nth); |
---|
252 | e_wn = get_week_num(e_nth); |
---|
253 | |
---|
254 | |
---|
255 | return new rule_type(start_rule(s_wn, s_d, s_m), |
---|
256 | end_rule(e_wn, e_d, e_m)); |
---|
257 | } |
---|
258 | //! helper function for parse_rules() |
---|
259 | week_num get_week_num(int nth) const |
---|
260 | { |
---|
261 | typedef typename rule_type::start_rule start_rule; |
---|
262 | switch(nth){ |
---|
263 | case 1: |
---|
264 | return start_rule::first; |
---|
265 | case 2: |
---|
266 | return start_rule::second; |
---|
267 | case 3: |
---|
268 | return start_rule::third; |
---|
269 | case 4: |
---|
270 | return start_rule::fourth; |
---|
271 | case 5: |
---|
272 | case -1: |
---|
273 | return start_rule::fifth; |
---|
274 | default: |
---|
275 | // shouldn't get here - add error handling later |
---|
276 | break; |
---|
277 | } |
---|
278 | return start_rule::fifth; // silence warnings |
---|
279 | } |
---|
280 | |
---|
281 | //! splits the [start|end]_date_rule string into 3 ints |
---|
282 | void split_rule_spec(int& nth, int& d, int& m, string_type rule) const |
---|
283 | { |
---|
284 | typedef boost::char_separator<char_type, std::char_traits<char_type> > char_separator_type; |
---|
285 | typedef boost::tokenizer<char_separator_type, |
---|
286 | std::basic_string<char_type>::const_iterator, |
---|
287 | std::basic_string<char_type> > tokenizer; |
---|
288 | typedef boost::tokenizer<char_separator_type, |
---|
289 | std::basic_string<char_type>::const_iterator, |
---|
290 | std::basic_string<char_type> >::iterator tokenizer_iterator; |
---|
291 | |
---|
292 | const char_type sep_char[] = { ';', '\0'}; |
---|
293 | char_separator_type sep(sep_char); |
---|
294 | tokenizer tokens(rule, sep); // 3 fields |
---|
295 | |
---|
296 | tokenizer_iterator tok_iter = tokens.begin(); |
---|
297 | nth = std::atoi(tok_iter->c_str()); ++tok_iter; |
---|
298 | d = std::atoi(tok_iter->c_str()); ++tok_iter; |
---|
299 | m = std::atoi(tok_iter->c_str()); |
---|
300 | } |
---|
301 | |
---|
302 | |
---|
303 | //! Take a line from the csv, turn it into a time_zone_type. |
---|
304 | /*! Take a line from the csv, turn it into a time_zone_type, |
---|
305 | * and add it to the map. Zone_specs in csv file are expected to |
---|
306 | * have eleven fields that describe the time zone. Returns true if |
---|
307 | * zone_spec successfully added to database */ |
---|
308 | bool parse_string(string_type& s) |
---|
309 | { |
---|
310 | |
---|
311 | std::vector<string_type> result; |
---|
312 | typedef boost::token_iterator_generator<boost::escaped_list_separator<char_type>, string_type::const_iterator, string_type >::type token_iter_type; |
---|
313 | |
---|
314 | token_iter_type i = boost::make_token_iterator<string_type>(s.begin(), s.end(),boost::escaped_list_separator<char_type>()); |
---|
315 | |
---|
316 | token_iter_type end; |
---|
317 | while (i != end) { |
---|
318 | result.push_back(*i); |
---|
319 | i++; |
---|
320 | } |
---|
321 | |
---|
322 | enum db_fields { ID, STDABBR, STDNAME, DSTABBR, DSTNAME, GMTOFFSET, |
---|
323 | DSTADJUST, START_DATE_RULE, START_TIME, END_DATE_RULE, |
---|
324 | END_TIME, FIELD_COUNT }; |
---|
325 | |
---|
326 | //take a shot at fixing gcc 4.x error |
---|
327 | const unsigned int expected_fields = static_cast<unsigned int>(FIELD_COUNT); |
---|
328 | if (result.size() != expected_fields) { |
---|
329 | std::stringstream msg; |
---|
330 | msg << "Expecting " << FIELD_COUNT << " fields, got " |
---|
331 | << result.size() << " fields in line: " << s; |
---|
332 | throw bad_field_count(msg.str()); |
---|
333 | } |
---|
334 | |
---|
335 | // initializations |
---|
336 | bool has_dst = true; |
---|
337 | if(result[DSTABBR] == std::string()){ |
---|
338 | has_dst = false; |
---|
339 | } |
---|
340 | |
---|
341 | |
---|
342 | // start building components of a time_zone |
---|
343 | time_zone_names names(result[STDNAME], result[STDABBR], |
---|
344 | result[DSTNAME], result[DSTABBR]); |
---|
345 | |
---|
346 | time_duration_type utc_offset = |
---|
347 | str_from_delimited_time_duration<time_duration_type,char_type>(result[GMTOFFSET]); |
---|
348 | |
---|
349 | dst_adjustment_offsets adjust(time_duration_type(0,0,0), |
---|
350 | time_duration_type(0,0,0), |
---|
351 | time_duration_type(0,0,0)); |
---|
352 | |
---|
353 | boost::shared_ptr<rule_type> rules; |
---|
354 | |
---|
355 | if(has_dst){ |
---|
356 | adjust = dst_adjustment_offsets( |
---|
357 | str_from_delimited_time_duration<time_duration_type,char_type>(result[DSTADJUST]), |
---|
358 | str_from_delimited_time_duration<time_duration_type,char_type>(result[START_TIME]), |
---|
359 | str_from_delimited_time_duration<time_duration_type,char_type>(result[END_TIME]) |
---|
360 | ); |
---|
361 | |
---|
362 | rules = |
---|
363 | boost::shared_ptr<rule_type>(parse_rules(result[START_DATE_RULE], |
---|
364 | result[END_DATE_RULE])); |
---|
365 | } |
---|
366 | string_type id(result[ID]); |
---|
367 | boost::shared_ptr<time_zone_base_type> zone(new time_zone_type(names, utc_offset, adjust, rules)); |
---|
368 | return (add_record(id, zone)); |
---|
369 | |
---|
370 | } |
---|
371 | |
---|
372 | }; |
---|
373 | |
---|
374 | } } // namespace |
---|
375 | |
---|
376 | #endif // DATE_TIME_TZ_DB_BASE_HPP__ |
---|