michael@0: michael@0: /* michael@0: ********************************************************************** michael@0: * Copyright (c) 2003-2010, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: ********************************************************************** michael@0: * Author: Alan Liu michael@0: * Created: July 10 2003 michael@0: * Since: ICU 2.8 michael@0: ********************************************************************** michael@0: */ michael@0: #include "tzfile.h" // from Olson tzcode archive, copied to this dir michael@0: michael@0: #ifdef WIN32 michael@0: michael@0: #include michael@0: #undef min // windows.h/STL conflict michael@0: #undef max // windows.h/STL conflict michael@0: // "identifier was truncated to 'number' characters" warning michael@0: #pragma warning(disable: 4786) michael@0: michael@0: #else michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: #endif michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: #include "tz2icu.h" michael@0: #include "unicode/uversion.h" michael@0: michael@0: using namespace std; michael@0: michael@0: bool ICU44PLUS = TRUE; michael@0: string TZ_RESOURCE_NAME = ICU_TZ_RESOURCE; michael@0: michael@0: //-------------------------------------------------------------------- michael@0: // Time utilities michael@0: //-------------------------------------------------------------------- michael@0: michael@0: const int64_t SECS_PER_YEAR = 31536000; // 365 days michael@0: const int64_t SECS_PER_LEAP_YEAR = 31622400; // 366 days michael@0: const int64_t LOWEST_TIME32 = (int64_t)((int32_t)0x80000000); michael@0: const int64_t HIGHEST_TIME32 = (int64_t)((int32_t)0x7fffffff); michael@0: michael@0: bool isLeap(int32_t y) { michael@0: return (y%4 == 0) && ((y%100 != 0) || (y%400 == 0)); // Gregorian michael@0: } michael@0: michael@0: int64_t secsPerYear(int32_t y) { michael@0: return isLeap(y) ? SECS_PER_LEAP_YEAR : SECS_PER_YEAR; michael@0: } michael@0: michael@0: /** michael@0: * Given a calendar year, return the GMT epoch seconds for midnight michael@0: * GMT of January 1 of that year. yearToSeconds(1970) == 0. michael@0: */ michael@0: int64_t yearToSeconds(int32_t year) { michael@0: // inefficient but foolproof michael@0: int64_t s = 0; michael@0: int32_t y = 1970; michael@0: while (y < year) { michael@0: s += secsPerYear(y++); michael@0: } michael@0: while (y > year) { michael@0: s -= secsPerYear(--y); michael@0: } michael@0: return s; michael@0: } michael@0: michael@0: /** michael@0: * Given 1970 GMT epoch seconds, return the calendar year containing michael@0: * that time. secondsToYear(0) == 1970. michael@0: */ michael@0: int32_t secondsToYear(int64_t seconds) { michael@0: // inefficient but foolproof michael@0: int32_t y = 1970; michael@0: int64_t s = 0; michael@0: if (seconds >= 0) { michael@0: for (;;) { michael@0: s += secsPerYear(y++); michael@0: if (s > seconds) break; michael@0: } michael@0: --y; michael@0: } else { michael@0: for (;;) { michael@0: s -= secsPerYear(--y); michael@0: if (s <= seconds) break; michael@0: } michael@0: } michael@0: return y; michael@0: } michael@0: michael@0: //-------------------------------------------------------------------- michael@0: // Types michael@0: //-------------------------------------------------------------------- michael@0: michael@0: struct FinalZone; michael@0: struct FinalRule; michael@0: struct SimplifiedZoneType; michael@0: michael@0: // A transition from one ZoneType to another michael@0: // Minimal size = 5 bytes (4+1) michael@0: struct Transition { michael@0: int64_t time; // seconds, 1970 epoch michael@0: int32_t type; // index into 'ZoneInfo.types' 0..255 michael@0: Transition(int64_t _time, int32_t _type) { michael@0: time = _time; michael@0: type = _type; michael@0: } michael@0: }; michael@0: michael@0: // A behavior mode (what zic calls a 'type') of a time zone. michael@0: // Minimal size = 6 bytes (4+1+3bits) michael@0: // SEE: SimplifiedZoneType michael@0: struct ZoneType { michael@0: int64_t rawoffset; // raw seconds offset from GMT michael@0: int64_t dstoffset; // dst seconds offset from GMT michael@0: michael@0: // We don't really need any of the following, but they are michael@0: // retained for possible future use. See SimplifiedZoneType. michael@0: int32_t abbr; // index into ZoneInfo.abbrs 0..n-1 michael@0: bool isdst; michael@0: bool isstd; michael@0: bool isgmt; michael@0: michael@0: ZoneType(const SimplifiedZoneType&); // used by optimizeTypeList michael@0: michael@0: ZoneType() : rawoffset(-1), dstoffset(-1), abbr(-1) {} michael@0: michael@0: // A restricted equality, of just the raw and dst offset michael@0: bool matches(const ZoneType& other) { michael@0: return rawoffset == other.rawoffset && michael@0: dstoffset == other.dstoffset; michael@0: } michael@0: }; michael@0: michael@0: // A collection of transitions from one ZoneType to another, together michael@0: // with a list of the ZoneTypes. A ZoneInfo object may have a long michael@0: // list of transitions between a smaller list of ZoneTypes. michael@0: // michael@0: // This object represents the contents of a single zic-created michael@0: // zoneinfo file. michael@0: struct ZoneInfo { michael@0: vector transitions; michael@0: vector types; michael@0: vector abbrs; michael@0: michael@0: string finalRuleID; michael@0: int32_t finalOffset; michael@0: int32_t finalYear; // -1 if none michael@0: michael@0: // If this is an alias, then all other fields are meaningless, and michael@0: // this field will point to the "real" zone 0..n-1. michael@0: int32_t aliasTo; // -1 if this is a "real" zone michael@0: michael@0: // If there are aliases TO this zone, then the following set will michael@0: // contain their index numbers (each index >= 0). michael@0: set aliases; michael@0: michael@0: ZoneInfo() : finalYear(-1), aliasTo(-1) {} michael@0: michael@0: void mergeFinalData(const FinalZone& fz); michael@0: michael@0: void optimizeTypeList(); michael@0: michael@0: // Set this zone to be an alias TO another zone. michael@0: void setAliasTo(int32_t index); michael@0: michael@0: // Clear the list of aliases OF this zone. michael@0: void clearAliases(); michael@0: michael@0: // Add an alias to the list of aliases OF this zone. michael@0: void addAlias(int32_t index); michael@0: michael@0: // Is this an alias to another zone? michael@0: bool isAlias() const { michael@0: return aliasTo >= 0; michael@0: } michael@0: michael@0: // Retrieve alias list michael@0: const set& getAliases() const { michael@0: return aliases; michael@0: } michael@0: michael@0: void print(ostream& os, const string& id) const; michael@0: }; michael@0: michael@0: void ZoneInfo::clearAliases() { michael@0: assert(aliasTo < 0); michael@0: aliases.clear(); michael@0: } michael@0: michael@0: void ZoneInfo::addAlias(int32_t index) { michael@0: assert(aliasTo < 0 && index >= 0 && aliases.find(index) == aliases.end()); michael@0: aliases.insert(index); michael@0: } michael@0: michael@0: void ZoneInfo::setAliasTo(int32_t index) { michael@0: assert(index >= 0); michael@0: assert(aliases.size() == 0); michael@0: aliasTo = index; michael@0: } michael@0: michael@0: typedef map ZoneMap; michael@0: michael@0: typedef ZoneMap::const_iterator ZoneMapIter; michael@0: michael@0: //-------------------------------------------------------------------- michael@0: // ZONEINFO michael@0: //-------------------------------------------------------------------- michael@0: michael@0: // Global map holding all our ZoneInfo objects, indexed by id. michael@0: ZoneMap ZONEINFO; michael@0: michael@0: //-------------------------------------------------------------------- michael@0: // zoneinfo file parsing michael@0: //-------------------------------------------------------------------- michael@0: michael@0: // Read zic-coded 32-bit integer from file michael@0: int64_t readcoded(ifstream& file, int64_t minv=numeric_limits::min(), michael@0: int64_t maxv=numeric_limits::max()) { michael@0: unsigned char buf[4]; // must be UNSIGNED michael@0: int64_t val=0; michael@0: file.read((char*)buf, 4); michael@0: for(int32_t i=0,shift=24;i<4;++i,shift-=8) { michael@0: val |= buf[i] << shift; michael@0: } michael@0: if (val < minv || val > maxv) { michael@0: ostringstream os; michael@0: os << "coded value out-of-range: " << val << ", expected [" michael@0: << minv << ", " << maxv << "]"; michael@0: throw out_of_range(os.str()); michael@0: } michael@0: return val; michael@0: } michael@0: michael@0: // Read zic-coded 64-bit integer from file michael@0: int64_t readcoded64(ifstream& file, int64_t minv=numeric_limits::min(), michael@0: int64_t maxv=numeric_limits::max()) { michael@0: unsigned char buf[8]; // must be UNSIGNED michael@0: int64_t val=0; michael@0: file.read((char*)buf, 8); michael@0: for(int32_t i=0,shift=56;i<8;++i,shift-=8) { michael@0: val |= (int64_t)buf[i] << shift; michael@0: } michael@0: if (val < minv || val > maxv) { michael@0: ostringstream os; michael@0: os << "coded value out-of-range: " << val << ", expected [" michael@0: << minv << ", " << maxv << "]"; michael@0: throw out_of_range(os.str()); michael@0: } michael@0: return val; michael@0: } michael@0: michael@0: // Read a boolean value michael@0: bool readbool(ifstream& file) { michael@0: char c; michael@0: file.read(&c, 1); michael@0: if (c!=0 && c!=1) { michael@0: ostringstream os; michael@0: os << "boolean value out-of-range: " << (int32_t)c; michael@0: throw out_of_range(os.str()); michael@0: } michael@0: return (c!=0); michael@0: } michael@0: michael@0: /** michael@0: * Read the zoneinfo file structure (see tzfile.h) into a ZoneInfo michael@0: * @param file an already-open file stream michael@0: */ michael@0: void readzoneinfo(ifstream& file, ZoneInfo& info, bool is64bitData) { michael@0: int32_t i; michael@0: michael@0: // Check for TZ_ICU_MAGIC signature at file start. If we get a michael@0: // signature mismatch, it means we're trying to read a file which michael@0: // isn't a ICU-modified-zic-created zoneinfo file. Typically this michael@0: // means the user is passing in a "normal" zoneinfo directory, or michael@0: // a zoneinfo directory that is polluted with other files, or that michael@0: // the user passed in the wrong directory. michael@0: char buf[32]; michael@0: file.read(buf, 4); michael@0: if (strncmp(buf, TZ_ICU_MAGIC, 4) != 0) { michael@0: throw invalid_argument("TZ_ICU_MAGIC signature missing"); michael@0: } michael@0: // skip additional Olson byte version michael@0: file.read(buf, 1); michael@0: // if '\0', we have just one copy of data, if '2', there is additional michael@0: // 64 bit version at the end. michael@0: if(buf[0]!=0 && buf[0]!='2') { michael@0: throw invalid_argument("Bad Olson version info"); michael@0: } michael@0: michael@0: // Read reserved bytes. The first of these will be a version byte. michael@0: file.read(buf, 15); michael@0: if (*(ICUZoneinfoVersion*)&buf != TZ_ICU_VERSION) { michael@0: throw invalid_argument("File version mismatch"); michael@0: } michael@0: michael@0: // Read array sizes michael@0: int64_t isgmtcnt = readcoded(file, 0); michael@0: int64_t isdstcnt = readcoded(file, 0); michael@0: int64_t leapcnt = readcoded(file, 0); michael@0: int64_t timecnt = readcoded(file, 0); michael@0: int64_t typecnt = readcoded(file, 0); michael@0: int64_t charcnt = readcoded(file, 0); michael@0: michael@0: // Confirm sizes that we assume to be equal. These assumptions michael@0: // are drawn from a reading of the zic source (2003a), so they michael@0: // should hold unless the zic source changes. michael@0: if (isgmtcnt != typecnt || isdstcnt != typecnt) { michael@0: throw invalid_argument("count mismatch between tzh_ttisgmtcnt, tzh_ttisdstcnt, tth_typecnt"); michael@0: } michael@0: michael@0: // Used temporarily to store transition times and types. We need michael@0: // to do this because the times and types are stored in two michael@0: // separate arrays. michael@0: vector transitionTimes(timecnt, -1); // temporary michael@0: vector transitionTypes(timecnt, -1); // temporary michael@0: michael@0: // Read transition times michael@0: for (i=0; i= typecnt) { michael@0: ostringstream os; michael@0: os << "illegal type: " << t << ", expected [0, " << (typecnt-1) << "]"; michael@0: throw out_of_range(os.str()); michael@0: } michael@0: transitionTypes[i] = t; michael@0: } michael@0: michael@0: // Build transitions vector out of corresponding times and types. michael@0: bool insertInitial = false; michael@0: if (is64bitData && !ICU44PLUS) { michael@0: if (timecnt > 0) { michael@0: int32_t minidx = -1; michael@0: for (i=0; i transitionTimes[minidx]) { michael@0: // Preserve the latest transition before the 32bit minimum time michael@0: minidx = i; michael@0: } michael@0: } else if (transitionTimes[i] > HIGHEST_TIME32) { michael@0: // Skipping the rest of the transition data. We cannot put such michael@0: // transitions into zoneinfo.res, because data is limited to singed michael@0: // 32bit int by the ICU resource bundle. michael@0: break; michael@0: } else { michael@0: info.transitions.push_back(Transition(transitionTimes[i], transitionTypes[i])); michael@0: } michael@0: } michael@0: michael@0: if (minidx != -1) { michael@0: // If there are any transitions before the 32bit minimum time, michael@0: // put the type information with the 32bit minimum time michael@0: vector::iterator itr = info.transitions.begin(); michael@0: info.transitions.insert(itr, Transition(LOWEST_TIME32, transitionTypes[minidx])); michael@0: } else { michael@0: // Otherwise, we need insert the initial type later michael@0: insertInitial = true; michael@0: } michael@0: } michael@0: } else { michael@0: for (i=0; i 0); michael@0: assert(typecnt > 0); michael@0: michael@0: int32_t initialTypeIdx = -1; michael@0: michael@0: // Check if the first type is not dst michael@0: if (info.types.at(0).dstoffset != 0) { michael@0: // Initial type's rawoffset is same with the rawoffset after the michael@0: // first transition, but no DST is observed. michael@0: int64_t rawoffset0 = (info.types.at(info.transitions.at(0).type)).rawoffset; michael@0: // Look for matching type michael@0: for (i=0; i<(int32_t)info.types.size(); ++i) { michael@0: if (info.types.at(i).rawoffset == rawoffset0 michael@0: && info.types.at(i).dstoffset == 0) { michael@0: initialTypeIdx = i; michael@0: break; michael@0: } michael@0: } michael@0: } else { michael@0: initialTypeIdx = 0; michael@0: } michael@0: assert(initialTypeIdx >= 0); michael@0: // Add the initial type associated with the lowest int32 time michael@0: vector::iterator itr = info.transitions.begin(); michael@0: info.transitions.insert(itr, Transition(LOWEST_TIME32, initialTypeIdx)); michael@0: } michael@0: michael@0: michael@0: // Read the abbreviation string michael@0: if (charcnt) { michael@0: // All abbreviations are concatenated together, with a 0 at michael@0: // the end of each abbr. michael@0: char* str = new char[charcnt + 8]; michael@0: file.read(str, charcnt); michael@0: michael@0: // Split abbreviations apart into individual strings. Record michael@0: // offset of each abbr in a vector. michael@0: vector abbroffset; michael@0: char *limit=str+charcnt; michael@0: for (char* p=str; p 0,1,2,3. michael@0: michael@0: // Keep track of which abbreviations get used. michael@0: vector abbrseen(abbroffset.size(), false); michael@0: michael@0: for (vector::iterator it=info.types.begin(); michael@0: it!=info.types.end(); michael@0: ++it) { michael@0: vector::const_iterator x= michael@0: find(abbroffset.begin(), abbroffset.end(), it->abbr); michael@0: if (x==abbroffset.end()) { michael@0: // TODO: Modify code to add a new string to the end of michael@0: // the abbr list when a middle offset is given, e.g., michael@0: // "abc*def*" where * == '\0', take offset of 1 and michael@0: // make the array "abc", "def", "bc", and translate 1 michael@0: // => 2. NOT CRITICAL since we don't even use the michael@0: // abbr at this time. michael@0: #if 0 michael@0: // TODO: Re-enable this warning if we start using michael@0: // the Olson abbr data, or if the above TODO is completed. michael@0: ostringstream os; michael@0: os << "Warning: unusual abbr offset " << it->abbr michael@0: << ", expected one of"; michael@0: for (vector::const_iterator y=abbroffset.begin(); michael@0: y!=abbroffset.end(); ++y) { michael@0: os << ' ' << *y; michael@0: } michael@0: cerr << os.str() << "; using 0" << endl; michael@0: #endif michael@0: it->abbr = 0; michael@0: } else { michael@0: int32_t index = x - abbroffset.begin(); michael@0: it->abbr = index; michael@0: abbrseen[index] = true; michael@0: } michael@0: } michael@0: michael@0: for (int32_t ii=0;ii<(int32_t) abbrseen.size();++ii) { michael@0: if (!abbrseen[ii]) { michael@0: cerr << "Warning: unused abbreviation: " << ii << endl; michael@0: } michael@0: } michael@0: } michael@0: michael@0: // Read leap second info, if any. michael@0: // *** We discard leap second data. *** michael@0: for (i=0; i + '\n' after the 64bit version data michael@0: char ch = file.get(); michael@0: if (ch == 0x0a) { michael@0: bool invalidchar = false; michael@0: while (file.get(ch)) { michael@0: if (ch == 0x0a) { michael@0: break; michael@0: } michael@0: if (ch < 0x20) { michael@0: // must be printable ascii michael@0: invalidchar = true; michael@0: break; michael@0: } michael@0: } michael@0: if (!invalidchar) { michael@0: eofPos = (int64_t) file.tellg(); michael@0: file.seekg(0, ios::end); michael@0: eofPos = eofPos - (int64_t) file.tellg(); michael@0: if (eofPos == 0) { michael@0: alldone = true; michael@0: } michael@0: } michael@0: } michael@0: if (!alldone) { michael@0: ostringstream os; michael@0: os << (-eofPos) << " unprocessed bytes at end"; michael@0: throw invalid_argument(os.str()); michael@0: } michael@0: michael@0: ZONEINFO[id] = info64; michael@0: } michael@0: michael@0: /** michael@0: * Recursively scan the given directory, calling handleFile() for each michael@0: * file in the tree. The user should call with the root directory and michael@0: * a prefix of "". The function will call itself with non-empty michael@0: * prefix values. michael@0: */ michael@0: #ifdef WIN32 michael@0: michael@0: void scandir(string dirname, string prefix="") { michael@0: HANDLE hList; michael@0: WIN32_FIND_DATA FileData; michael@0: michael@0: // Get the first file michael@0: hList = FindFirstFile((dirname + "\\*").c_str(), &FileData); michael@0: if (hList == INVALID_HANDLE_VALUE) { michael@0: cerr << "Error: Invalid directory: " << dirname << endl; michael@0: exit(1); michael@0: } michael@0: for (;;) { michael@0: string name(FileData.cFileName); michael@0: string path(dirname + "\\" + name); michael@0: if (FileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { michael@0: if (name != "." && name != "..") { michael@0: scandir(path, prefix + name + "/"); michael@0: } michael@0: } else { michael@0: try { michael@0: string id = prefix + name; michael@0: handleFile(path, id); michael@0: } catch (const exception& e) { michael@0: cerr << "Error: While processing \"" << path << "\", " michael@0: << e.what() << endl; michael@0: exit(1); michael@0: } michael@0: } michael@0: michael@0: if (!FindNextFile(hList, &FileData)) { michael@0: if (GetLastError() == ERROR_NO_MORE_FILES) { michael@0: break; michael@0: } // else...? michael@0: } michael@0: } michael@0: FindClose(hList); michael@0: } michael@0: michael@0: #else michael@0: michael@0: void scandir(string dir, string prefix="") { michael@0: DIR *dp; michael@0: struct dirent *dir_entry; michael@0: struct stat stat_info; michael@0: char pwd[512]; michael@0: vector subdirs; michael@0: vector subfiles; michael@0: michael@0: if ((dp = opendir(dir.c_str())) == NULL) { michael@0: cerr << "Error: Invalid directory: " << dir << endl; michael@0: exit(1); michael@0: } michael@0: if (!getcwd(pwd, sizeof(pwd))) { michael@0: cerr << "Error: Directory name too long" << endl; michael@0: exit(1); michael@0: } michael@0: chdir(dir.c_str()); michael@0: while ((dir_entry = readdir(dp)) != NULL) { michael@0: string name = dir_entry->d_name; michael@0: string path = dir + "/" + name; michael@0: lstat(dir_entry->d_name,&stat_info); michael@0: if (S_ISDIR(stat_info.st_mode)) { michael@0: if (name != "." && name != "..") { michael@0: subdirs.push_back(path); michael@0: subdirs.push_back(prefix + name + "/"); michael@0: // scandir(path, prefix + name + "/"); michael@0: } michael@0: } else { michael@0: try { michael@0: string id = prefix + name; michael@0: subfiles.push_back(path); michael@0: subfiles.push_back(id); michael@0: // handleFile(path, id); michael@0: } catch (const exception& e) { michael@0: cerr << "Error: While processing \"" << path << "\", " michael@0: << e.what() << endl; michael@0: exit(1); michael@0: } michael@0: } michael@0: } michael@0: closedir(dp); michael@0: chdir(pwd); michael@0: michael@0: for(int32_t i=0;i<(int32_t)subfiles.size();i+=2) { michael@0: try { michael@0: handleFile(subfiles[i], subfiles[i+1]); michael@0: } catch (const exception& e) { michael@0: cerr << "Error: While processing \"" << subfiles[i] << "\", " michael@0: << e.what() << endl; michael@0: exit(1); michael@0: } michael@0: } michael@0: for(int32_t i=0;i<(int32_t)subdirs.size();i+=2) { michael@0: scandir(subdirs[i], subdirs[i+1]); michael@0: } michael@0: } michael@0: michael@0: #endif michael@0: michael@0: //-------------------------------------------------------------------- michael@0: // Final zone and rule info michael@0: //-------------------------------------------------------------------- michael@0: michael@0: /** michael@0: * Read and discard the current line. michael@0: */ michael@0: void consumeLine(istream& in) { michael@0: int32_t c; michael@0: do { michael@0: c = in.get(); michael@0: } while (c != EOF && c != '\n'); michael@0: } michael@0: michael@0: enum { michael@0: DOM = 0, michael@0: DOWGEQ = 1, michael@0: DOWLEQ = 2 michael@0: }; michael@0: michael@0: const char* TIME_MODE[] = {"w", "s", "u"}; michael@0: michael@0: // Allow 29 days in February because zic outputs February 29 michael@0: // for rules like "last Sunday in February". michael@0: const int32_t MONTH_LEN[] = {31,29,31,30,31,30,31,31,30,31,30,31}; michael@0: michael@0: const int32_t HOUR = 3600; michael@0: michael@0: struct FinalZone { michael@0: int32_t offset; // raw offset michael@0: int32_t year; // takes effect for y >= year michael@0: string ruleid; michael@0: set aliases; michael@0: FinalZone(int32_t _offset, int32_t _year, const string& _ruleid) : michael@0: offset(_offset), year(_year), ruleid(_ruleid) { michael@0: if (offset <= -16*HOUR || offset >= 16*HOUR) { michael@0: ostringstream os; michael@0: os << "Invalid input offset " << offset michael@0: << " for year " << year michael@0: << " and rule ID " << ruleid; michael@0: throw invalid_argument(os.str()); michael@0: } michael@0: if (year < 1900 || year >= 2050) { michael@0: ostringstream os; michael@0: os << "Invalid input year " << year michael@0: << " with offset " << offset michael@0: << " and rule ID " << ruleid; michael@0: throw invalid_argument(os.str()); michael@0: } michael@0: } michael@0: FinalZone() : offset(-1), year(-1) {} michael@0: void addLink(const string& alias) { michael@0: if (aliases.find(alias) != aliases.end()) { michael@0: ostringstream os; michael@0: os << "Duplicate alias " << alias; michael@0: throw invalid_argument(os.str()); michael@0: } michael@0: aliases.insert(alias); michael@0: } michael@0: }; michael@0: michael@0: struct FinalRulePart { michael@0: int32_t mode; michael@0: int32_t month; michael@0: int32_t dom; michael@0: int32_t dow; michael@0: int32_t time; michael@0: int32_t offset; // dst offset, usually either 0 or 1:00 michael@0: michael@0: // Isstd and isgmt only have 3 valid states, corresponding to local michael@0: // wall time, local standard time, and GMT standard time. michael@0: // Here is how the isstd & isgmt flags are set by zic: michael@0: //| case 's': /* Standard */ michael@0: //| rp->r_todisstd = TRUE; michael@0: //| rp->r_todisgmt = FALSE; michael@0: //| case 'w': /* Wall */ michael@0: //| rp->r_todisstd = FALSE; michael@0: //| rp->r_todisgmt = FALSE; michael@0: //| case 'g': /* Greenwich */ michael@0: //| case 'u': /* Universal */ michael@0: //| case 'z': /* Zulu */ michael@0: //| rp->r_todisstd = TRUE; michael@0: //| rp->r_todisgmt = TRUE; michael@0: bool isstd; michael@0: bool isgmt; michael@0: michael@0: bool isset; // used during building; later ignored michael@0: michael@0: FinalRulePart() : isset(false) {} michael@0: void set(const string& id, michael@0: const string& _mode, michael@0: int32_t _month, michael@0: int32_t _dom, michael@0: int32_t _dow, michael@0: int32_t _time, michael@0: bool _isstd, michael@0: bool _isgmt, michael@0: int32_t _offset) { michael@0: if (isset) { michael@0: throw invalid_argument("FinalRulePart set twice"); michael@0: } michael@0: isset = true; michael@0: if (_mode == "DOWLEQ") { michael@0: mode = DOWLEQ; michael@0: } else if (_mode == "DOWGEQ") { michael@0: mode = DOWGEQ; michael@0: } else if (_mode == "DOM") { michael@0: mode = DOM; michael@0: } else { michael@0: throw invalid_argument("Unrecognized FinalRulePart mode"); michael@0: } michael@0: month = _month; michael@0: dom = _dom; michael@0: dow = _dow; michael@0: time = _time; michael@0: isstd = _isstd; michael@0: isgmt = _isgmt; michael@0: offset = _offset; michael@0: michael@0: ostringstream os; michael@0: if (month < 0 || month >= 12) { michael@0: os << "Invalid input month " << month; michael@0: } michael@0: if (dom < 1 || dom > MONTH_LEN[month]) { michael@0: os << "Invalid input day of month " << dom; michael@0: } michael@0: if (mode != DOM && (dow < 0 || dow >= 7)) { michael@0: os << "Invalid input day of week " << dow; michael@0: } michael@0: if (offset < 0 || offset > HOUR) { michael@0: os << "Invalid input offset " << offset; michael@0: } michael@0: if (isgmt && !isstd) { michael@0: os << "Invalid input isgmt && !isstd"; michael@0: } michael@0: if (!os.str().empty()) { michael@0: os << " for rule " michael@0: << id michael@0: << _mode michael@0: << month << dom << dow << time michael@0: << isstd << isgmt michael@0: << offset; michael@0: throw invalid_argument(os.str()); michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * Return the time mode as an ICU SimpleTimeZone int from 0..2; michael@0: * see simpletz.h. michael@0: */ michael@0: int32_t timemode() const { michael@0: if (isgmt) { michael@0: assert(isstd); michael@0: return 2; // gmt standard michael@0: } michael@0: if (isstd) { michael@0: return 1; // local standard michael@0: } michael@0: return 0; // local wall michael@0: } michael@0: michael@0: // The SimpleTimeZone encoding method for rules is as follows: michael@0: // stz_dowim stz_dow michael@0: // DOM: dom 0 michael@0: // DOWGEQ: dom -(dow+1) michael@0: // DOWLEQ: -dom -(dow+1) michael@0: // E.g., to encode Mon>=7, use stz_dowim=7, stz_dow=-2 michael@0: // to encode Mon<=7, use stz_dowim=-7, stz_dow=-2 michael@0: // to encode 7, use stz_dowim=7, stz_dow=0 michael@0: // Note that for this program and for SimpleTimeZone, 0==Jan, michael@0: // but for this program 0==Sun while for SimpleTimeZone 1==Sun. michael@0: michael@0: /** michael@0: * Return a "dowim" param suitable for SimpleTimeZone. michael@0: */ michael@0: int32_t stz_dowim() const { michael@0: return (mode == DOWLEQ) ? -dom : dom; michael@0: } michael@0: michael@0: /** michael@0: * Return a "dow" param suitable for SimpleTimeZone. michael@0: */ michael@0: int32_t stz_dow() const { michael@0: return (mode == DOM) ? 0 : -(dow+1); michael@0: } michael@0: }; michael@0: michael@0: struct FinalRule { michael@0: FinalRulePart part[2]; michael@0: michael@0: bool isset() const { michael@0: return part[0].isset && part[1].isset; michael@0: } michael@0: michael@0: void print(ostream& os) const; michael@0: }; michael@0: michael@0: map finalZones; michael@0: map finalRules; michael@0: michael@0: map > links; michael@0: map reverseLinks; michael@0: map linkSource; // id => "Olson link" or "ICU alias" michael@0: michael@0: /** michael@0: * Predicate used to find FinalRule objects that do not have both michael@0: * sub-parts set (indicating an error in the input file). michael@0: */ michael@0: bool isNotSet(const pair& p) { michael@0: return !p.second.isset(); michael@0: } michael@0: michael@0: /** michael@0: * Predicate used to find FinalZone objects that do not map to a known michael@0: * rule (indicating an error in the input file). michael@0: */ michael@0: bool mapsToUnknownRule(const pair& p) { michael@0: return finalRules.find(p.second.ruleid) == finalRules.end(); michael@0: } michael@0: michael@0: /** michael@0: * This set is used to make sure each rule in finalRules is used at michael@0: * least once. First we populate it with all the rules from michael@0: * finalRules; then we remove all the rules referred to in michael@0: * finaleZones. michael@0: */ michael@0: set ruleIDset; michael@0: michael@0: void insertRuleID(const pair& p) { michael@0: ruleIDset.insert(p.first); michael@0: } michael@0: michael@0: void eraseRuleID(const pair& p) { michael@0: ruleIDset.erase(p.second.ruleid); michael@0: } michael@0: michael@0: /** michael@0: * Populate finalZones and finalRules from the given istream. michael@0: */ michael@0: void readFinalZonesAndRules(istream& in) { michael@0: michael@0: for (;;) { michael@0: string token; michael@0: in >> token; michael@0: if (in.eof() || !in) { michael@0: break; michael@0: } else if (token == "zone") { michael@0: // zone Africa/Cairo 7200 1995 Egypt # zone Africa/Cairo, offset 7200, year >= 1995, rule Egypt (0) michael@0: string id, ruleid; michael@0: int32_t offset, year; michael@0: in >> id >> offset >> year >> ruleid; michael@0: consumeLine(in); michael@0: finalZones[id] = FinalZone(offset, year, ruleid); michael@0: } else if (token == "rule") { michael@0: // rule US DOWGEQ 3 1 0 7200 0 0 3600 # 52: US, file data/northamerica, line 119, mode DOWGEQ, April, dom 1, Sunday, time 7200, isstd 0, isgmt 0, offset 3600 michael@0: // rule US DOWLEQ 9 31 0 7200 0 0 0 # 53: US, file data/northamerica, line 114, mode DOWLEQ, October, dom 31, Sunday, time 7200, isstd 0, isgmt 0, offset 0 michael@0: string id, mode; michael@0: int32_t month, dom, dow, time, offset; michael@0: bool isstd, isgmt; michael@0: in >> id >> mode >> month >> dom >> dow >> time >> isstd >> isgmt >> offset; michael@0: consumeLine(in); michael@0: FinalRule& fr = finalRules[id]; michael@0: int32_t p = fr.part[0].isset ? 1 : 0; michael@0: fr.part[p].set(id, mode, month, dom, dow, time, isstd, isgmt, offset); michael@0: } else if (token == "link") { michael@0: string fromid, toid; // fromid == "real" zone, toid == alias michael@0: in >> fromid >> toid; michael@0: // DO NOT consumeLine(in); michael@0: if (finalZones.find(toid) != finalZones.end()) { michael@0: throw invalid_argument("Bad link: `to' id is a \"real\" zone"); michael@0: } michael@0: michael@0: links[fromid].insert(toid); michael@0: reverseLinks[toid] = fromid; michael@0: michael@0: linkSource[fromid] = "Olson link"; michael@0: linkSource[toid] = "Olson link"; michael@0: } else if (token.length() > 0 && token[0] == '#') { michael@0: consumeLine(in); michael@0: } else { michael@0: throw invalid_argument("Unrecognized keyword"); michael@0: } michael@0: } michael@0: michael@0: if (!in.eof() && !in) { michael@0: throw invalid_argument("Parse failure"); michael@0: } michael@0: michael@0: // Perform validity check: Each rule should have data for 2 parts. michael@0: if (count_if(finalRules.begin(), finalRules.end(), isNotSet) != 0) { michael@0: throw invalid_argument("One or more incomplete rule pairs"); michael@0: } michael@0: michael@0: // Perform validity check: Each zone should map to a known rule. michael@0: if (count_if(finalZones.begin(), finalZones.end(), mapsToUnknownRule) != 0) { michael@0: throw invalid_argument("One or more zones refers to an unknown rule"); michael@0: } michael@0: michael@0: // Perform validity check: Each rule should be referred to by a zone. michael@0: ruleIDset.clear(); michael@0: for_each(finalRules.begin(), finalRules.end(), insertRuleID); michael@0: for_each(finalZones.begin(), finalZones.end(), eraseRuleID); michael@0: if (ruleIDset.size() != 0) { michael@0: throw invalid_argument("Unused rules"); michael@0: } michael@0: } michael@0: michael@0: //-------------------------------------------------------------------- michael@0: // Resource bundle output michael@0: //-------------------------------------------------------------------- michael@0: michael@0: // SEE olsontz.h FOR RESOURCE BUNDLE DATA LAYOUT michael@0: michael@0: void ZoneInfo::print(ostream& os, const string& id) const { michael@0: // Implement compressed format #2: michael@0: os << " /* " << id << " */ "; michael@0: michael@0: if (aliasTo >= 0) { michael@0: assert(aliases.size() == 0); michael@0: os << ":int { " << aliasTo << " } "; // No endl - save room for comment. michael@0: return; michael@0: } michael@0: michael@0: if (ICU44PLUS) { michael@0: os << ":table {" << endl; michael@0: } else { michael@0: os << ":array {" << endl; michael@0: } michael@0: michael@0: vector::const_iterator trn; michael@0: vector::const_iterator typ; michael@0: michael@0: bool first; michael@0: michael@0: if (ICU44PLUS) { michael@0: trn = transitions.begin(); michael@0: michael@0: // pre 32bit transitions michael@0: if (trn != transitions.end() && trn->time < LOWEST_TIME32) { michael@0: os << " transPre32:intvector { "; michael@0: for (first = true; trn != transitions.end() && trn->time < LOWEST_TIME32; ++trn) { michael@0: if (!first) { michael@0: os<< ", "; michael@0: } michael@0: first = false; michael@0: os << (int32_t)(trn->time >> 32) << ", " << (int32_t)(trn->time & 0x00000000ffffffff); michael@0: } michael@0: os << " }" << endl; michael@0: } michael@0: michael@0: // 32bit transtions michael@0: if (trn != transitions.end() && trn->time < HIGHEST_TIME32) { michael@0: os << " trans:intvector { "; michael@0: for (first = true; trn != transitions.end() && trn->time < HIGHEST_TIME32; ++trn) { michael@0: if (!first) { michael@0: os << ", "; michael@0: } michael@0: first = false; michael@0: os << trn->time; michael@0: } michael@0: os << " }" << endl; michael@0: } michael@0: michael@0: // post 32bit transitons michael@0: if (trn != transitions.end()) { michael@0: os << " transPost32:intvector { "; michael@0: for (first = true; trn != transitions.end(); ++trn) { michael@0: if (!first) { michael@0: os<< ", "; michael@0: } michael@0: first = false; michael@0: os << (int32_t)(trn->time >> 32) << ", " << (int32_t)(trn->time & 0x00000000ffffffff); michael@0: } michael@0: os << " }" << endl; michael@0: } michael@0: } else { michael@0: os << " :intvector { "; michael@0: for (trn = transitions.begin(), first = true; trn != transitions.end(); ++trn) { michael@0: if (!first) os << ", "; michael@0: first = false; michael@0: os << trn->time; michael@0: } michael@0: os << " }" << endl; michael@0: } michael@0: michael@0: michael@0: first=true; michael@0: if (ICU44PLUS) { michael@0: os << " typeOffsets:intvector { "; michael@0: } else { michael@0: os << " :intvector { "; michael@0: } michael@0: for (typ = types.begin(); typ != types.end(); ++typ) { michael@0: if (!first) os << ", "; michael@0: first = false; michael@0: os << typ->rawoffset << ", " << typ->dstoffset; michael@0: } michael@0: os << " }" << endl; michael@0: michael@0: if (ICU44PLUS) { michael@0: if (transitions.size() != 0) { michael@0: os << " typeMap:bin { \"" << hex << setfill('0'); michael@0: for (trn = transitions.begin(); trn != transitions.end(); ++trn) { michael@0: os << setw(2) << trn->type; michael@0: } michael@0: os << dec << "\" }" << endl; michael@0: } michael@0: } else { michael@0: os << " :bin { \"" << hex << setfill('0'); michael@0: for (trn = transitions.begin(); trn != transitions.end(); ++trn) { michael@0: os << setw(2) << trn->type; michael@0: } michael@0: os << dec << "\" }" << endl; michael@0: } michael@0: michael@0: // Final zone info, if any michael@0: if (finalYear != -1) { michael@0: if (ICU44PLUS) { michael@0: os << " finalRule { \"" << finalRuleID << "\" }" << endl; michael@0: os << " finalRaw:int { " << finalOffset << " }" << endl; michael@0: os << " finalYear:int { " << finalYear << " }" << endl; michael@0: } else { michael@0: os << " \"" << finalRuleID << "\"" << endl; michael@0: os << " :intvector { " << finalOffset << ", " michael@0: << finalYear << " }" << endl; michael@0: } michael@0: } michael@0: michael@0: // Alias list, if any michael@0: if (aliases.size() != 0) { michael@0: first = true; michael@0: if (ICU44PLUS) { michael@0: os << " links:intvector { "; michael@0: } else { michael@0: os << " :intvector { "; michael@0: } michael@0: for (set::const_iterator i=aliases.begin(); i!=aliases.end(); ++i) { michael@0: if (!first) os << ", "; michael@0: first = false; michael@0: os << *i; michael@0: } michael@0: os << " }" << endl; michael@0: } michael@0: michael@0: os << " } "; // no trailing 'endl', so comments can be placed. michael@0: } michael@0: michael@0: inline ostream& michael@0: operator<<(ostream& os, const ZoneMap& zoneinfo) { michael@0: int32_t c = 0; michael@0: for (ZoneMapIter it = zoneinfo.begin(); michael@0: it != zoneinfo.end(); michael@0: ++it) { michael@0: if(c && !ICU44PLUS) os << ","; michael@0: it->second.print(os, it->first); michael@0: os << "//Z#" << c++ << endl; michael@0: } michael@0: return os; michael@0: } michael@0: michael@0: // print the string list michael@0: ostream& printStringList( ostream& os, const ZoneMap& zoneinfo) { michael@0: int32_t n = 0; // count michael@0: int32_t col = 0; // column michael@0: os << " Names {" << endl michael@0: << " "; michael@0: for (ZoneMapIter it = zoneinfo.begin(); michael@0: it != zoneinfo.end(); michael@0: ++it) { michael@0: if(n) { michael@0: os << ","; michael@0: col ++; michael@0: } michael@0: const string& id = it->first; michael@0: os << "\"" << id << "\""; michael@0: col += id.length() + 2; michael@0: if(col >= 50) { michael@0: os << " // " << n << endl michael@0: << " "; michael@0: col = 0; michael@0: } michael@0: n++; michael@0: } michael@0: os << " // " << (n-1) << endl michael@0: << " }" << endl; michael@0: michael@0: return os; michael@0: } michael@0: michael@0: //-------------------------------------------------------------------- michael@0: // main michael@0: //-------------------------------------------------------------------- michael@0: michael@0: // Unary predicate for finding transitions after a given time michael@0: bool isAfter(const Transition t, int64_t thresh) { michael@0: return t.time >= thresh; michael@0: } michael@0: michael@0: /** michael@0: * A zone type that contains only the raw and dst offset. Used by the michael@0: * optimizeTypeList() method. michael@0: */ michael@0: struct SimplifiedZoneType { michael@0: int64_t rawoffset; michael@0: int64_t dstoffset; michael@0: SimplifiedZoneType() : rawoffset(-1), dstoffset(-1) {} michael@0: SimplifiedZoneType(const ZoneType& t) : rawoffset(t.rawoffset), michael@0: dstoffset(t.dstoffset) {} michael@0: bool operator<(const SimplifiedZoneType& t) const { michael@0: return rawoffset < t.rawoffset || michael@0: (rawoffset == t.rawoffset && michael@0: dstoffset < t.dstoffset); michael@0: } michael@0: }; michael@0: michael@0: /** michael@0: * Construct a ZoneType from a SimplifiedZoneType. Note that this michael@0: * discards information; the new ZoneType will have meaningless michael@0: * (empty) abbr, isdst, isstd, and isgmt flags; this is appropriate, michael@0: * since ignoring these is how we do optimization (we have no use for michael@0: * these in historical transitions). michael@0: */ michael@0: ZoneType::ZoneType(const SimplifiedZoneType& t) : michael@0: rawoffset(t.rawoffset), dstoffset(t.dstoffset), michael@0: abbr(-1), isdst(false), isstd(false), isgmt(false) {} michael@0: michael@0: /** michael@0: * Optimize the type list to remove excess entries. The type list may michael@0: * contain entries that are distinct only in terms of their dst, std, michael@0: * or gmt flags. Since we don't care about those flags, we can reduce michael@0: * the type list to a set of unique raw/dst offset pairs, and remap michael@0: * the type indices in the transition list, which stores, for each michael@0: * transition, a transition time and a type index. michael@0: */ michael@0: void ZoneInfo::optimizeTypeList() { michael@0: // Assemble set of unique types; only those in the `transitions' michael@0: // list, since there may be unused types in the `types' list michael@0: // corresponding to transitions that have been trimmed (during michael@0: // merging of final data). michael@0: michael@0: if (aliasTo >= 0) return; // Nothing to do for aliases michael@0: michael@0: if (!ICU44PLUS) { michael@0: // This is the old logic which has a bug, which occasionally removes michael@0: // the type before the first transition. The problem was fixed michael@0: // by inserting the dummy transition indirectly. michael@0: michael@0: // If there are zero transitions and one type, then leave that as-is. michael@0: if (transitions.size() == 0) { michael@0: if (types.size() != 1) { michael@0: cerr << "Error: transition count = 0, type count = " << types.size() << endl; michael@0: } michael@0: return; michael@0: } michael@0: michael@0: set simpleset; michael@0: for (vector::const_iterator i=transitions.begin(); michael@0: i!=transitions.end(); ++i) { michael@0: assert(i->type < (int32_t)types.size()); michael@0: simpleset.insert(types[i->type]); michael@0: } michael@0: michael@0: // Map types to integer indices michael@0: map simplemap; michael@0: int32_t n=0; michael@0: for (set::const_iterator i=simpleset.begin(); michael@0: i!=simpleset.end(); ++i) { michael@0: simplemap[*i] = n++; michael@0: } michael@0: michael@0: // Remap transitions michael@0: for (vector::iterator i=transitions.begin(); michael@0: i!=transitions.end(); ++i) { michael@0: assert(i->type < (int32_t)types.size()); michael@0: ZoneType oldtype = types[i->type]; michael@0: SimplifiedZoneType newtype(oldtype); michael@0: assert(simplemap.find(newtype) != simplemap.end()); michael@0: i->type = simplemap[newtype]; michael@0: } michael@0: michael@0: // Replace type list michael@0: types.clear(); michael@0: copy(simpleset.begin(), simpleset.end(), back_inserter(types)); michael@0: michael@0: } else { michael@0: if (types.size() > 1) { michael@0: // Note: localtime uses the very first non-dst type as initial offsets. michael@0: // If all types are DSTs, the very first type is treated as the initial offsets. michael@0: michael@0: // Decide a type used as the initial offsets. ICU put the type at index 0. michael@0: ZoneType initialType = types[0]; michael@0: for (vector::const_iterator i=types.begin(); i!=types.end(); ++i) { michael@0: if (i->dstoffset == 0) { michael@0: initialType = *i; michael@0: break; michael@0: } michael@0: } michael@0: michael@0: SimplifiedZoneType initialSimplifiedType(initialType); michael@0: michael@0: // create a set of unique types, but ignoring fields which we're not interested in michael@0: set simpleset; michael@0: simpleset.insert(initialSimplifiedType); michael@0: for (vector::const_iterator i=transitions.begin(); i!=transitions.end(); ++i) { michael@0: assert(i->type < (int32_t)types.size()); michael@0: simpleset.insert(types[i->type]); michael@0: } michael@0: michael@0: // Map types to integer indices, however, keeping the first type at offset 0 michael@0: map simplemap; michael@0: simplemap[initialSimplifiedType] = 0; michael@0: int32_t n = 1; michael@0: for (set::const_iterator i=simpleset.begin(); i!=simpleset.end(); ++i) { michael@0: if (*i < initialSimplifiedType || initialSimplifiedType < *i) { michael@0: simplemap[*i] = n++; michael@0: } michael@0: } michael@0: michael@0: // Remap transitions michael@0: for (vector::iterator i=transitions.begin(); michael@0: i!=transitions.end(); ++i) { michael@0: assert(i->type < (int32_t)types.size()); michael@0: ZoneType oldtype = types[i->type]; michael@0: SimplifiedZoneType newtype(oldtype); michael@0: assert(simplemap.find(newtype) != simplemap.end()); michael@0: i->type = simplemap[newtype]; michael@0: } michael@0: michael@0: // Replace type list michael@0: types.clear(); michael@0: types.push_back(initialSimplifiedType); michael@0: for (set::const_iterator i=simpleset.begin(); i!=simpleset.end(); ++i) { michael@0: if (*i < initialSimplifiedType || initialSimplifiedType < *i) { michael@0: types.push_back(*i); michael@0: } michael@0: } michael@0: michael@0: // Reiterating transitions to remove any transitions which michael@0: // do not actually change the raw/dst offsets michael@0: int32_t prevTypeIdx = 0; michael@0: for (vector::iterator i=transitions.begin(); i!=transitions.end();) { michael@0: if (i->type == prevTypeIdx) { michael@0: // this is not a time transition, probably just name change michael@0: // e.g. America/Resolute after 2006 in 2010b michael@0: transitions.erase(i); michael@0: } else { michael@0: prevTypeIdx = i->type; michael@0: i++; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: } michael@0: michael@0: /** michael@0: * Merge final zone data into this zone. michael@0: */ michael@0: void ZoneInfo::mergeFinalData(const FinalZone& fz) { michael@0: int32_t year = fz.year; michael@0: int64_t seconds = yearToSeconds(year); michael@0: michael@0: if (!ICU44PLUS) { michael@0: if (seconds > HIGHEST_TIME32) { michael@0: // Avoid transitions beyond signed 32bit max second. michael@0: // This may result incorrect offset computation around michael@0: // HIGHEST_TIME32. This is a limitation of ICU michael@0: // before 4.4. michael@0: seconds = HIGHEST_TIME32; michael@0: } michael@0: } michael@0: michael@0: vector::iterator it = michael@0: find_if(transitions.begin(), transitions.end(), michael@0: bind2nd(ptr_fun(isAfter), seconds)); michael@0: transitions.erase(it, transitions.end()); michael@0: michael@0: if (finalYear != -1) { michael@0: throw invalid_argument("Final zone already merged in"); michael@0: } michael@0: finalYear = fz.year; michael@0: finalOffset = fz.offset; michael@0: finalRuleID = fz.ruleid; michael@0: } michael@0: michael@0: /** michael@0: * Merge the data from the given final zone into the core zone data by michael@0: * calling the ZoneInfo member function mergeFinalData. michael@0: */ michael@0: void mergeOne(const string& zoneid, const FinalZone& fz) { michael@0: if (ZONEINFO.find(zoneid) == ZONEINFO.end()) { michael@0: throw invalid_argument("Unrecognized final zone ID"); michael@0: } michael@0: ZONEINFO[zoneid].mergeFinalData(fz); michael@0: } michael@0: michael@0: /** michael@0: * Visitor function that merges the final zone data into the main zone michael@0: * data structures. It calls mergeOne for each final zone and its michael@0: * list of aliases. michael@0: */ michael@0: void mergeFinalZone(const pair& p) { michael@0: const string& id = p.first; michael@0: const FinalZone& fz = p.second; michael@0: michael@0: mergeOne(id, fz); michael@0: } michael@0: michael@0: /** michael@0: * Print this rule in resource bundle format to os. ID and enclosing michael@0: * braces handled elsewhere. michael@0: */ michael@0: void FinalRule::print(ostream& os) const { michael@0: // First print the rule part that enters DST; then the rule part michael@0: // that exits it. michael@0: int32_t whichpart = (part[0].offset != 0) ? 0 : 1; michael@0: assert(part[whichpart].offset != 0); michael@0: assert(part[1-whichpart].offset == 0); michael@0: michael@0: os << " "; michael@0: for (int32_t i=0; i<2; ++i) { michael@0: const FinalRulePart& p = part[whichpart]; michael@0: whichpart = 1-whichpart; michael@0: os << p.month << ", " << p.stz_dowim() << ", " << p.stz_dow() << ", " michael@0: << p.time << ", " << p.timemode() << ", "; michael@0: } michael@0: os << part[whichpart].offset << endl; michael@0: } michael@0: michael@0: int main(int argc, char *argv[]) { michael@0: string rootpath, zonetab, version; michael@0: bool validArgs = FALSE; michael@0: michael@0: if (argc == 4 || argc == 5) { michael@0: validArgs = TRUE; michael@0: rootpath = argv[1]; michael@0: zonetab = argv[2]; michael@0: version = argv[3]; michael@0: if (argc == 5) { michael@0: if (strcmp(argv[4], "--old") == 0) { michael@0: ICU44PLUS = FALSE; michael@0: TZ_RESOURCE_NAME = ICU_TZ_RESOURCE_OLD; michael@0: } else { michael@0: validArgs = FALSE; michael@0: } michael@0: } michael@0: } michael@0: if (!validArgs) { michael@0: cout << "Usage: tz2icu [--old]" << endl michael@0: << " path to zoneinfo file tree generated by" << endl michael@0: << " ICU-patched version of zic" << endl michael@0: << " country map, from tzdata archive," << endl michael@0: << " typically named \"zone.tab\"" << endl michael@0: << " version string, such as \"2003e\"" << endl michael@0: << " --old generating resource format before ICU4.4" << endl; michael@0: exit(1); michael@0: } michael@0: michael@0: cout << "Olson data version: " << version << endl; michael@0: cout << "ICU 4.4+ format: " << (ICU44PLUS ? "Yes" : "No") << endl; michael@0: michael@0: try { michael@0: ifstream finals(ICU_ZONE_FILE); michael@0: if (finals) { michael@0: readFinalZonesAndRules(finals); michael@0: michael@0: cout << "Finished reading " << finalZones.size() michael@0: << " final zones and " << finalRules.size() michael@0: << " final rules from " ICU_ZONE_FILE << endl; michael@0: } else { michael@0: cerr << "Error: Unable to open " ICU_ZONE_FILE << endl; michael@0: return 1; michael@0: } michael@0: } catch (const exception& error) { michael@0: cerr << "Error: While reading " ICU_ZONE_FILE ": " << error.what() << endl; michael@0: return 1; michael@0: } michael@0: michael@0: try { michael@0: // Recursively scan all files below the given path, accumulating michael@0: // their data into ZONEINFO. All files must be TZif files. Any michael@0: // failure along the way will result in a call to exit(1). michael@0: scandir(rootpath); michael@0: } catch (const exception& error) { michael@0: cerr << "Error: While scanning " << rootpath << ": " << error.what() << endl; michael@0: return 1; michael@0: } michael@0: michael@0: cout << "Finished reading " << ZONEINFO.size() << " zoneinfo files [" michael@0: << (ZONEINFO.begin())->first << ".." michael@0: << (--ZONEINFO.end())->first << "]" << endl; michael@0: michael@0: try { michael@0: for_each(finalZones.begin(), finalZones.end(), mergeFinalZone); michael@0: } catch (const exception& error) { michael@0: cerr << "Error: While merging final zone data: " << error.what() << endl; michael@0: return 1; michael@0: } michael@0: michael@0: // Process links (including ICU aliases). For each link set we have michael@0: // a canonical ID (e.g., America/Los_Angeles) and a set of one or more michael@0: // aliases (e.g., PST, PST8PDT, ...). michael@0: michael@0: // 1. Add all aliases as zone objects in ZONEINFO michael@0: for (map >::const_iterator i = links.begin(); michael@0: i!=links.end(); ++i) { michael@0: const string& olson = i->first; michael@0: const set& aliases = i->second; michael@0: if (ZONEINFO.find(olson) == ZONEINFO.end()) { michael@0: cerr << "Error: Invalid " << linkSource[olson] << " to non-existent \"" michael@0: << olson << "\"" << endl; michael@0: return 1; michael@0: } michael@0: for (set::const_iterator j=aliases.begin(); michael@0: j!=aliases.end(); ++j) { michael@0: ZONEINFO[*j] = ZoneInfo(); michael@0: } michael@0: } michael@0: michael@0: // 2. Create a mapping from zones to index numbers 0..n-1. michael@0: map zoneIDs; michael@0: vector zoneIDlist; michael@0: int32_t z=0; michael@0: for (ZoneMap::iterator i=ZONEINFO.begin(); i!=ZONEINFO.end(); ++i) { michael@0: zoneIDs[i->first] = z++; michael@0: zoneIDlist.push_back(i->first); michael@0: } michael@0: assert(z == (int32_t) ZONEINFO.size()); michael@0: michael@0: // 3. Merge aliases. Sometimes aliases link to other aliases; we michael@0: // resolve these into simplest possible sets. michael@0: map > links2; michael@0: map reverse2; michael@0: for (map >::const_iterator i = links.begin(); michael@0: i!=links.end(); ++i) { michael@0: string olson = i->first; michael@0: while (reverseLinks.find(olson) != reverseLinks.end()) { michael@0: olson = reverseLinks[olson]; michael@0: } michael@0: for (set::const_iterator j=i->second.begin(); j!=i->second.end(); ++j) { michael@0: links2[olson].insert(*j); michael@0: reverse2[*j] = olson; michael@0: } michael@0: } michael@0: links = links2; michael@0: reverseLinks = reverse2; michael@0: michael@0: if (false) { // Debugging: Emit link map michael@0: for (map >::const_iterator i = links.begin(); michael@0: i!=links.end(); ++i) { michael@0: cout << i->first << ": "; michael@0: for (set::const_iterator j=i->second.begin(); j!=i->second.end(); ++j) { michael@0: cout << *j << ", "; michael@0: } michael@0: cout << endl; michael@0: } michael@0: } michael@0: michael@0: // 4. Update aliases michael@0: for (map >::const_iterator i = links.begin(); michael@0: i!=links.end(); ++i) { michael@0: const string& olson = i->first; michael@0: const set& aliases = i->second; michael@0: ZONEINFO[olson].clearAliases(); michael@0: ZONEINFO[olson].addAlias(zoneIDs[olson]); michael@0: for (set::const_iterator j=aliases.begin(); michael@0: j!=aliases.end(); ++j) { michael@0: assert(zoneIDs.find(olson) != zoneIDs.end()); michael@0: assert(zoneIDs.find(*j) != zoneIDs.end()); michael@0: assert(ZONEINFO.find(*j) != ZONEINFO.end()); michael@0: ZONEINFO[*j].setAliasTo(zoneIDs[olson]); michael@0: ZONEINFO[olson].addAlias(zoneIDs[*j]); michael@0: } michael@0: } michael@0: michael@0: // Once merging of final data is complete, we can optimize the type list michael@0: for (ZoneMap::iterator i=ZONEINFO.begin(); i!=ZONEINFO.end(); ++i) { michael@0: i->second.optimizeTypeList(); michael@0: } michael@0: michael@0: // Create the country map michael@0: map > countryMap; // country -> set of zones michael@0: map reverseCountryMap; // zone -> country michael@0: try { michael@0: ifstream f(zonetab.c_str()); michael@0: if (!f) { michael@0: cerr << "Error: Unable to open " << zonetab << endl; michael@0: return 1; michael@0: } michael@0: int32_t n = 0; michael@0: string line; michael@0: while (getline(f, line)) { michael@0: string::size_type lb = line.find('#'); michael@0: if (lb != string::npos) { michael@0: line.resize(lb); // trim comments michael@0: } michael@0: string country, coord, zone; michael@0: istringstream is(line); michael@0: is >> country >> coord >> zone; michael@0: if (country.size() == 0) continue; michael@0: if (country.size() != 2 || zone.size() < 1) { michael@0: cerr << "Error: Can't parse " << line << " in " << zonetab << endl; michael@0: return 1; michael@0: } michael@0: if (ZONEINFO.find(zone) == ZONEINFO.end()) { michael@0: cerr << "Error: Country maps to invalid zone " << zone michael@0: << " in " << zonetab << endl; michael@0: return 1; michael@0: } michael@0: countryMap[country].insert(zone); michael@0: reverseCountryMap[zone] = country; michael@0: //cerr << (n+1) << ": " << country << " <=> " << zone << endl; michael@0: ++n; michael@0: } michael@0: cout << "Finished reading " << n michael@0: << " country entries from " << zonetab << endl; michael@0: } catch (const exception& error) { michael@0: cerr << "Error: While reading " << zonetab << ": " << error.what() << endl; michael@0: return 1; michael@0: } michael@0: michael@0: // Merge ICU aliases into country map. Don't merge any alias michael@0: // that already has a country map, since that doesn't make sense. michael@0: // E.g. "Link Europe/Oslo Arctic/Longyearbyen" doesn't mean we michael@0: // should cross-map the countries between these two zones. michael@0: for (map >::const_iterator i = links.begin(); michael@0: i!=links.end(); ++i) { michael@0: const string& olson(i->first); michael@0: if (reverseCountryMap.find(olson) == reverseCountryMap.end()) { michael@0: continue; michael@0: } michael@0: string c = reverseCountryMap[olson]; michael@0: const set& aliases(i->second); michael@0: for (set::const_iterator j=aliases.begin(); michael@0: j != aliases.end(); ++j) { michael@0: if (reverseCountryMap.find(*j) == reverseCountryMap.end()) { michael@0: countryMap[c].insert(*j); michael@0: reverseCountryMap[*j] = c; michael@0: //cerr << "Aliased country: " << c << " <=> " << *j << endl; michael@0: } michael@0: } michael@0: } michael@0: michael@0: // Create a pseudo-country containing all zones belonging to no country michael@0: set nocountry; michael@0: for (ZoneMap::iterator i=ZONEINFO.begin(); i!=ZONEINFO.end(); ++i) { michael@0: if (reverseCountryMap.find(i->first) == reverseCountryMap.end()) { michael@0: nocountry.insert(i->first); michael@0: } michael@0: } michael@0: countryMap[""] = nocountry; michael@0: michael@0: // Get local time & year for below michael@0: time_t sec; michael@0: time(&sec); michael@0: struct tm* now = localtime(&sec); michael@0: int32_t thisYear = now->tm_year + 1900; michael@0: michael@0: string filename = TZ_RESOURCE_NAME + ".txt"; michael@0: // Write out a resource-bundle source file containing data for michael@0: // all zones. michael@0: ofstream file(filename.c_str()); michael@0: if (file) { michael@0: file << "//---------------------------------------------------------" << endl michael@0: << "// Copyright (C) 2003"; michael@0: if (thisYear > 2003) { michael@0: file << "-" << thisYear; michael@0: } michael@0: file << ", International Business Machines" << endl michael@0: << "// Corporation and others. All Rights Reserved." << endl michael@0: << "//---------------------------------------------------------" << endl michael@0: << "// Build tool: tz2icu" << endl michael@0: << "// Build date: " << asctime(now) /* << endl -- asctime emits CR */ michael@0: << "// Olson source: ftp://elsie.nci.nih.gov/pub/" << endl michael@0: << "// Olson version: " << version << endl michael@0: << "// ICU version: " << U_ICU_VERSION << endl michael@0: << "//---------------------------------------------------------" << endl michael@0: << "// >> !!! >> THIS IS A MACHINE-GENERATED FILE << !!! <<" << endl michael@0: << "// >> !!! >>> DO NOT EDIT <<< !!! <<" << endl michael@0: << "//---------------------------------------------------------" << endl michael@0: << endl michael@0: << TZ_RESOURCE_NAME << ":table(nofallback) {" << endl michael@0: << " TZVersion { \"" << version << "\" }" << endl michael@0: << " Zones:array { " << endl michael@0: << ZONEINFO // Zones (the actual data) michael@0: << " }" << endl; michael@0: michael@0: // Names correspond to the Zones list, used for binary searching. michael@0: printStringList ( file, ZONEINFO ); // print the Names list michael@0: michael@0: // Final Rules are used if requested by the zone michael@0: file << " Rules { " << endl; michael@0: // Emit final rules michael@0: int32_t frc = 0; michael@0: for(map::iterator i=finalRules.begin(); michael@0: i!=finalRules.end(); ++i) { michael@0: const string& id = i->first; michael@0: const FinalRule& r = i->second; michael@0: file << " " << id << ":intvector {" << endl; michael@0: r.print(file); michael@0: file << " } //_#" << frc++ << endl; michael@0: } michael@0: file << " }" << endl; michael@0: michael@0: // Emit country (region) map. michael@0: if (ICU44PLUS) { michael@0: file << " Regions:array {" << endl; michael@0: int32_t zn = 0; michael@0: for (ZoneMap::iterator i=ZONEINFO.begin(); i!=ZONEINFO.end(); ++i) { michael@0: map::iterator cit = reverseCountryMap.find(i->first); michael@0: if (cit == reverseCountryMap.end()) { michael@0: file << " \"001\","; michael@0: } else { michael@0: file << " \"" << cit->second << "\", "; michael@0: } michael@0: file << "//Z#" << zn++ << " " << i->first << endl; michael@0: } michael@0: file << " }" << endl; michael@0: } else { michael@0: file << " Regions { " << endl; michael@0: int32_t rc = 0; michael@0: for (map >::const_iterator i=countryMap.begin(); michael@0: i != countryMap.end(); ++i) { michael@0: string country = i->first; michael@0: const set& zones(i->second); michael@0: file << " "; michael@0: if(country[0]==0) { michael@0: file << "Default"; michael@0: } michael@0: file << country << ":intvector { "; michael@0: bool first = true; michael@0: for (set::const_iterator j=zones.begin(); michael@0: j != zones.end(); ++j) { michael@0: if (!first) file << ", "; michael@0: first = false; michael@0: if (zoneIDs.find(*j) == zoneIDs.end()) { michael@0: cerr << "Error: Nonexistent zone in country map: " << *j << endl; michael@0: return 1; michael@0: } michael@0: file << zoneIDs[*j]; // emit the zone's index number michael@0: } michael@0: file << " } //R#" << rc++ << endl; michael@0: } michael@0: file << " }" << endl; michael@0: } michael@0: michael@0: file << "}" << endl; michael@0: } michael@0: michael@0: file.close(); michael@0: michael@0: if (file) { // recheck error bit michael@0: cout << "Finished writing " << TZ_RESOURCE_NAME << ".txt" << endl; michael@0: } else { michael@0: cerr << "Error: Unable to open/write to " << TZ_RESOURCE_NAME << ".txt" << endl; michael@0: return 1; michael@0: } michael@0: } michael@0: //eof