[BACK]Return to ziguard.awk CVS log [TXT][DIR] Up to [local] / src / share / zoneinfo

Annotation of src/share/zoneinfo/ziguard.awk, Revision 1.1

1.1     ! millert     1: # Convert tzdata source into vanguard or rearguard form.
        !             2:
        !             3: # Contributed by Paul Eggert.  This file is in the public domain.
        !             4:
        !             5: # This is not a general-purpose converter; it is designed for current tzdata.
        !             6: # It just converts from current source to main, vanguard, and rearguard forms.
        !             7: # Although it might be nice for it to be idempotent, or to be useful
        !             8: # for converting back and forth between vanguard and rearguard formats,
        !             9: # it does not do these nonessential tasks now.
        !            10: #
        !            11: # Although main and vanguard forms are currently equivalent,
        !            12: # this need not always be the case.  When the two forms differ,
        !            13: # this script can convert either from main to vanguard form (needed then),
        !            14: # or from vanguard to main form (this conversion would be needed later,
        !            15: # after main became rearguard and vanguard became main).
        !            16: # There is no need to convert rearguard to other forms.
        !            17: #
        !            18: # When converting to vanguard form, the output can use the line
        !            19: # "Zone GMT 0 - GMT" which TZUpdater 2.3.2 mistakenly rejects.
        !            20: #
        !            21: # When converting to vanguard form, the output can use negative SAVE
        !            22: # values.
        !            23: #
        !            24: # When converting to rearguard form, the output uses only nonnegative
        !            25: # SAVE values.  The idea is for the output data to simulate the behavior
        !            26: # of the input data as best it can within the constraints of the
        !            27: # rearguard format.
        !            28:
        !            29: # Given a FIELD like "-0:30", return a minute count like -30.
        !            30: function get_minutes(field, \
        !            31:                     sign, hours, minutes)
        !            32: {
        !            33:   sign = field ~ /^-/ ? -1 : 1
        !            34:   hours = +field
        !            35:   if (field ~ /:/) {
        !            36:     minutes = field
        !            37:     sub(/[^:]*:/, "", minutes)
        !            38:   }
        !            39:   return 60 * hours + sign * minutes
        !            40: }
        !            41:
        !            42: # Given an OFFSET, which is a minute count like 300 or 330,
        !            43: # return a %z-style abbreviation like "+05" or "+0530".
        !            44: function offset_abbr(offset, \
        !            45:                     hours, minutes, sign)
        !            46: {
        !            47:   hours = int(offset / 60)
        !            48:   minutes = offset % 60
        !            49:   if (minutes) {
        !            50:     return sprintf("%+.4d", hours * 100 + minutes);
        !            51:   } else {
        !            52:     return sprintf("%+.2d", hours)
        !            53:   }
        !            54: }
        !            55:
        !            56: # Round TIMESTAMP (a +-hh:mm:ss.dddd string) to the nearest second.
        !            57: function round_to_second(timestamp, \
        !            58:                         hh, mm, ss, seconds, dot_dddd, subseconds)
        !            59: {
        !            60:   dot_dddd = timestamp
        !            61:   if (!sub(/^[+-]?[0-9]+:[0-9]+:[0-9]+\./, ".", dot_dddd))
        !            62:     return timestamp
        !            63:   hh = mm = ss = timestamp
        !            64:   sub(/^[-+]?[0-9]+:[0-9]+:/, "", ss)
        !            65:   sub(/^[-+]?[0-9]+:/, "", mm)
        !            66:   sub(/^[-+]?/, "", hh)
        !            67:   seconds = 3600 * hh + 60 * mm + ss
        !            68:   subseconds = +dot_dddd
        !            69:   seconds += 0.5 < subseconds || ((subseconds == 0.5) && (seconds % 2));
        !            70:   return sprintf("%s%d:%.2d:%.2d", timestamp ~ /^-/ ? "-" : "", \
        !            71:                 seconds / 3600, seconds / 60 % 60, seconds % 60)
        !            72: }
        !            73:
        !            74: BEGIN {
        !            75:   dataform_type["vanguard"] = 1
        !            76:   dataform_type["main"] = 1
        !            77:   dataform_type["rearguard"] = 1
        !            78:
        !            79:   if (PACKRATLIST) {
        !            80:     while (getline <PACKRATLIST) {
        !            81:       if ($0 ~ /^#/) continue
        !            82:       packratlist[$3] = 1
        !            83:     }
        !            84:   }
        !            85:
        !            86:   # The command line should set DATAFORM.
        !            87:   if (!dataform_type[DATAFORM]) exit 1
        !            88: }
        !            89:
        !            90: $1 == "#PACKRATLIST" && $2 == PACKRATLIST {
        !            91:   sub(/^#PACKRATLIST[\t ]+[^\t ]+[\t ]+/, "")
        !            92: }
        !            93:
        !            94: /^Zone/ { zone = $2 }
        !            95:
        !            96: DATAFORM != "main" {
        !            97:   in_comment = $0 ~ /^#/
        !            98:   uncomment = comment_out = 0
        !            99:
        !           100:   # If this line should differ due to Czechoslovakia using negative SAVE values,
        !           101:   # uncomment the desired version and comment out the undesired one.
        !           102:   if (zone == "Europe/Prague" && $0 ~ /^#?[\t ]+[01]:00[\t ]/ \
        !           103:       && $0 ~ /1947 Feb 23/) {
        !           104:     if (($(in_comment + 2) != "-") == (DATAFORM != "rearguard")) {
        !           105:       uncomment = in_comment
        !           106:     } else {
        !           107:       comment_out = !in_comment
        !           108:     }
        !           109:   }
        !           110:
        !           111:   # If this line should differ due to Ireland using negative SAVE values,
        !           112:   # uncomment the desired version and comment out the undesired one.
        !           113:   Rule_Eire = $0 ~ /^#?Rule[\t ]+Eire[\t ]/
        !           114:   Zone_Dublin_post_1968 \
        !           115:     = (zone == "Europe/Dublin" && $0 ~ /^#?[\t ]+[01]:00[\t ]/ \
        !           116:        && (!$(in_comment + 4) || 1968 < $(in_comment + 4)))
        !           117:   if (Rule_Eire || Zone_Dublin_post_1968) {
        !           118:     if ((Rule_Eire \
        !           119:         || (Zone_Dublin_post_1968 && $(in_comment + 3) == "IST/GMT"))  \
        !           120:        == (DATAFORM != "rearguard")) {
        !           121:       uncomment = in_comment
        !           122:     } else {
        !           123:       comment_out = !in_comment
        !           124:     }
        !           125:   }
        !           126:
        !           127:   # If this line should differ due to Namibia using negative SAVE values,
        !           128:   # uncomment the desired version and comment out the undesired one.
        !           129:   Rule_Namibia = $0 ~ /^#?Rule[\t ]+Namibia[\t ]/
        !           130:   Zone_using_Namibia_rule \
        !           131:     = (zone == "Africa/Windhoek" && $0 ~ /^#?[\t ]+[12]:00[\t ]/ \
        !           132:        && ($(in_comment + 2) == "Namibia" \
        !           133:           || ($(in_comment + 2) == "-" && $(in_comment + 3) == "CAT" \
        !           134:               && ((1994 <= $(in_comment + 4) && $(in_comment + 4) <= 2017) \
        !           135:                   || in_comment + 3 == NF))))
        !           136:   if (Rule_Namibia || Zone_using_Namibia_rule) {
        !           137:     if ((Rule_Namibia \
        !           138:         ? ($9 ~ /^-/ || ($9 == 0 && $10 == "CAT")) \
        !           139:         : $(in_comment + 1) == "2:00" && $(in_comment + 2) == "Namibia") \
        !           140:        == (DATAFORM != "rearguard")) {
        !           141:       uncomment = in_comment
        !           142:     } else {
        !           143:       comment_out = !in_comment
        !           144:     }
        !           145:   }
        !           146:
        !           147:   # If this line should differ due to Portugal benefiting from %z if supported,
        !           148:   # uncomment the desired version and comment out the undesired one.
        !           149:   if ($0 ~ /^#?[\t ]+-[12]:00[\t ]+Port[\t ]+[%+-]/) {
        !           150:     if (($0 ~ /%z/) == (DATAFORM == "vanguard")) {
        !           151:       uncomment = in_comment
        !           152:     } else {
        !           153:       comment_out = !in_comment
        !           154:     }
        !           155:   }
        !           156:
        !           157:   # In vanguard form, use the line "Zone GMT 0 - GMT" instead of
        !           158:   # "Zone Etc/GMT 0 - GMT" and adjust Link lines accordingly.
        !           159:   # This works around a bug in TZUpdater 2.3.2.
        !           160:   if (/^#?(Zone|Link)[\t ]+(Etc\/)?GMT[\t ]/) {
        !           161:     if (($2 == "GMT") == (DATAFORM == "vanguard")) {
        !           162:       uncomment = in_comment
        !           163:     } else {
        !           164:       comment_out = !in_comment
        !           165:     }
        !           166:   }
        !           167:
        !           168:   if (uncomment) {
        !           169:     sub(/^#/, "")
        !           170:   }
        !           171:   if (comment_out) {
        !           172:     sub(/^/, "#")
        !           173:   }
        !           174:
        !           175:   # Prefer %z in vanguard form, explicit abbreviations otherwise.
        !           176:   if (DATAFORM == "vanguard") {
        !           177:     sub(/^(Zone[\t ]+[^\t ]+)?[\t ]+[^\t ]+[\t ]+[^\t ]+[\t ]+[-+][^\t ]+/, \
        !           178:        "&CHANGE-TO-%z")
        !           179:     sub(/-00CHANGE-TO-%z/, "-00")
        !           180:     sub(/[-+][^\t ]+CHANGE-TO-/, "")
        !           181:   } else {
        !           182:     if ($0 ~ /^[^#]*%z/) {
        !           183:       stdoff_column = 2 * ($0 ~ /^Zone/) + 1
        !           184:       rules_column = stdoff_column + 1
        !           185:       stdoff = get_minutes($stdoff_column)
        !           186:       rules = $rules_column
        !           187:       stdabbr = offset_abbr(stdoff)
        !           188:       if (rules == "-") {
        !           189:        abbr = stdabbr
        !           190:       } else {
        !           191:        dstabbr_only = rules ~ /^[+0-9-]/
        !           192:        if (dstabbr_only) {
        !           193:          dstoff = get_minutes(rules)
        !           194:        } else {
        !           195:          # The DST offset is normally an hour, but there are special cases.
        !           196:          if (rules == "Morocco" && NF == 3) {
        !           197:            dstoff = -60
        !           198:          } else if (rules == "NBorneo") {
        !           199:            dstoff = 20
        !           200:          } else if (((rules == "Cook" || rules == "LH") && NF == 3) \
        !           201:                     || (rules == "Uruguay" \
        !           202:                         && $0 ~ /[\t ](1942 Dec 14|1960|1970|1974 Dec 22)$/)) {
        !           203:            dstoff = 30
        !           204:          } else if (rules == "Uruguay" && $0 ~ /[\t ]1974 Mar 10$/) {
        !           205:            dstoff = 90
        !           206:          } else {
        !           207:            dstoff = 60
        !           208:          }
        !           209:        }
        !           210:        dstabbr = offset_abbr(stdoff + dstoff)
        !           211:        if (dstabbr_only) {
        !           212:          abbr = dstabbr
        !           213:        } else {
        !           214:          abbr = stdabbr "/" dstabbr
        !           215:        }
        !           216:       }
        !           217:       sub(/%z/, abbr)
        !           218:     }
        !           219:   }
        !           220:
        !           221:   # Normally, prefer whole seconds.  However, prefer subseconds
        !           222:   # if generating vanguard form and the otherwise-undocumented
        !           223:   # VANGUARD_SUBSECONDS environment variable is set.
        !           224:   # This relies on #STDOFF comment lines in the data.
        !           225:   # It is for hypothetical clients that support UT offsets that are
        !           226:   # not integer multiples of one second (e.g., Europe/Lisbon, 1884 to 1912).
        !           227:   # No known clients need this currently, and this experimental
        !           228:   # feature may be changed or withdrawn in future releases.
        !           229:   if ($1 == "#STDOFF") {
        !           230:     stdoff = $2
        !           231:     rounded_stdoff = round_to_second(stdoff)
        !           232:     if (DATAFORM == "vanguard" && ENVIRON["VANGUARD_SUBSECONDS"]) {
        !           233:       stdoff_subst[0] = rounded_stdoff
        !           234:       stdoff_subst[1] = stdoff
        !           235:     } else {
        !           236:       stdoff_subst[0] = stdoff
        !           237:       stdoff_subst[1] = rounded_stdoff
        !           238:     }
        !           239:   } else if (stdoff_subst[0]) {
        !           240:     stdoff_column = 2 * ($0 ~ /^Zone/) + 1
        !           241:     stdoff_column_val = $stdoff_column
        !           242:     if (stdoff_column_val == stdoff_subst[0]) {
        !           243:       sub(stdoff_subst[0], stdoff_subst[1])
        !           244:     } else if (stdoff_column_val != stdoff_subst[1]) {
        !           245:       stdoff_subst[0] = 0
        !           246:     }
        !           247:   }
        !           248:
        !           249:   # In rearguard form, change the Japan rule line with "Sat>=8 25:00"
        !           250:   # to "Sun>=9 1:00", to cater to zic before 2007 and to older Java.
        !           251:   if ($0 ~ /^Rule/ && $2 == "Japan") {
        !           252:     if (DATAFORM == "rearguard") {
        !           253:       if ($7 == "Sat>=8" && $8 == "25:00") {
        !           254:        sub(/Sat>=8/, "Sun>=9")
        !           255:        sub(/25:00/, " 1:00")
        !           256:       }
        !           257:     } else {
        !           258:       if ($7 == "Sun>=9" && $8 == "1:00") {
        !           259:        sub(/Sun>=9/, "Sat>=8")
        !           260:        sub(/ 1:00/, "25:00")
        !           261:       }
        !           262:     }
        !           263:   }
        !           264:
        !           265:   # In rearguard form, change the Morocco lines with negative SAVE values
        !           266:   # to use positive SAVE values.
        !           267:   if ($2 == "Morocco") {
        !           268:     if ($0 ~ /^Rule/) {
        !           269:       if ($4 ~ /^201[78]$/ && $6 == "Oct") {
        !           270:        if (DATAFORM == "rearguard") {
        !           271:          sub(/\t2018\t/, "\t2017\t")
        !           272:        } else {
        !           273:          sub(/\t2017\t/, "\t2018\t")
        !           274:        }
        !           275:       }
        !           276:
        !           277:       if (2019 <= $3) {
        !           278:        if ($8 == "2:00") {
        !           279:          if (DATAFORM == "rearguard") {
        !           280:            sub(/\t0\t/, "\t1:00\t")
        !           281:          } else {
        !           282:            sub(/\t1:00\t/, "\t0\t")
        !           283:          }
        !           284:        } else {
        !           285:          if (DATAFORM == "rearguard") {
        !           286:            sub(/\t-1:00\t/, "\t0\t")
        !           287:          } else {
        !           288:            sub(/\t0\t/, "\t-1:00\t")
        !           289:          }
        !           290:        }
        !           291:       }
        !           292:     }
        !           293:     if ($1 ~ /^[+0-9-]/ && NF == 3) {
        !           294:       if (DATAFORM == "rearguard") {
        !           295:        sub(/1:00\tMorocco/, "0:00\tMorocco")
        !           296:        sub(/\t\+01\/\+00$/, "\t+00/+01")
        !           297:       } else {
        !           298:        sub(/0:00\tMorocco/, "1:00\tMorocco")
        !           299:        sub(/\t\+00\/+01$/, "\t+01/+00")
        !           300:       }
        !           301:     }
        !           302:   }
        !           303: }
        !           304:
        !           305: /^Zone/ {
        !           306:   packrat_ignored = FILENAME == PACKRATDATA && PACKRATLIST && !packratlist[$2];
        !           307: }
        !           308: {
        !           309:   if (packrat_ignored && $0 !~ /^Rule/) {
        !           310:     sub(/^/, "#")
        !           311:   }
        !           312: }
        !           313:
        !           314: # Return a link line resulting by changing OLDLINE to link to TARGET
        !           315: # from LINKNAME, instead of linking to OLDTARGET from LINKNAME.
        !           316: # Align data columns the same as they were in OLDLINE.
        !           317: # Also, replace any existing white space followed by comment with COMMENT.
        !           318: function make_linkline(oldline, target, linkname, oldtarget, comment, \
        !           319:                       oldprefix, oldprefixlen, oldtargettabs, \
        !           320:                       replsuffix, targettabs)
        !           321: {
        !           322:   oldprefix = "Link\t" oldtarget "\t"
        !           323:   oldprefixlen = length(oldprefix)
        !           324:   if (substr(oldline, 1, oldprefixlen) == oldprefix) {
        !           325:     # Use tab stops to preserve LINKNAME's column.
        !           326:     replsuffix = substr(oldline, oldprefixlen + 1)
        !           327:     sub(/[\t ]*#.*/, "", replsuffix)
        !           328:     oldtargettabs = int(length(oldtarget) / 8) + 1
        !           329:     targettabs = int(length(target) / 8) + 1
        !           330:     for (; targettabs < oldtargettabs; targettabs++) {
        !           331:       replsuffix = "\t" replsuffix
        !           332:     }
        !           333:     for (; oldtargettabs < targettabs && replsuffix ~ /^\t/; targettabs--) {
        !           334:       replsuffix = substr(replsuffix, 2)
        !           335:     }
        !           336:   } else {
        !           337:     # Odd format line; don't bother lining up its replacement nicely.
        !           338:     replsuffix = linkname
        !           339:   }
        !           340:   return "Link\t" target "\t" replsuffix comment
        !           341: }
        !           342:
        !           343: /^Link/ && $4 == "#=" && DATAFORM == "vanguard" {
        !           344:   $0 = make_linkline($0, $5, $3, $2)
        !           345: }
        !           346:
        !           347: # If a Link line is followed by a Link or Zone line for the same data, comment
        !           348: # out the Link line.  This can happen if backzone overrides a Link
        !           349: # with a Zone or a different Link.
        !           350: /^Zone/ {
        !           351:   sub(/^Link/, "#Link", line[linkline[$2]])
        !           352: }
        !           353: /^Link/ {
        !           354:   sub(/^Link/, "#Link", line[linkline[$3]])
        !           355:   linkline[$3] = NR
        !           356:   linktarget[$3] = $2
        !           357: }
        !           358:
        !           359: { line[NR] = $0 }
        !           360:
        !           361: function cut_link_chains_short( \
        !           362:                               l, linkname, t, target)
        !           363: {
        !           364:   for (linkname in linktarget) {
        !           365:     target = linktarget[linkname]
        !           366:     t = linktarget[target]
        !           367:     if (t) {
        !           368:       # TARGET is itself a link name.  Replace the line "Link TARGET LINKNAME"
        !           369:       # with "Link T LINKNAME #= TARGET", where T is at the end of the chain
        !           370:       # of links that LINKNAME points to.
        !           371:       while ((u = linktarget[t])) {
        !           372:        t = u
        !           373:       }
        !           374:       l = linkline[linkname]
        !           375:       line[l] = make_linkline(line[l], t, linkname, target, "\t#= " target)
        !           376:     }
        !           377:   }
        !           378: }
        !           379:
        !           380: END {
        !           381:   if (DATAFORM != "vanguard") {
        !           382:     cut_link_chains_short()
        !           383:   }
        !           384:   for (i = 1; i <= NR; i++)
        !           385:     print line[i]
        !           386: }