Annotation of src/share/zoneinfo/ziguard.awk, Revision 1.1
1.1 ! millert 1: # Convert tzdata source into vanguard or rearguard form.
! 2:
! 3: # Contributed by Paul Eggert. This file is in the public domain.
! 4:
! 5: # This is not a general-purpose converter; it is designed for current tzdata.
! 6: # It just converts from current source to main, vanguard, and rearguard forms.
! 7: # Although it might be nice for it to be idempotent, or to be useful
! 8: # for converting back and forth between vanguard and rearguard formats,
! 9: # it does not do these nonessential tasks now.
! 10: #
! 11: # Although main and vanguard forms are currently equivalent,
! 12: # this need not always be the case. When the two forms differ,
! 13: # this script can convert either from main to vanguard form (needed then),
! 14: # or from vanguard to main form (this conversion would be needed later,
! 15: # after main became rearguard and vanguard became main).
! 16: # There is no need to convert rearguard to other forms.
! 17: #
! 18: # When converting to vanguard form, the output can use the line
! 19: # "Zone GMT 0 - GMT" which TZUpdater 2.3.2 mistakenly rejects.
! 20: #
! 21: # When converting to vanguard form, the output can use negative SAVE
! 22: # values.
! 23: #
! 24: # When converting to rearguard form, the output uses only nonnegative
! 25: # SAVE values. The idea is for the output data to simulate the behavior
! 26: # of the input data as best it can within the constraints of the
! 27: # rearguard format.
! 28:
! 29: # Given a FIELD like "-0:30", return a minute count like -30.
! 30: function get_minutes(field, \
! 31: sign, hours, minutes)
! 32: {
! 33: sign = field ~ /^-/ ? -1 : 1
! 34: hours = +field
! 35: if (field ~ /:/) {
! 36: minutes = field
! 37: sub(/[^:]*:/, "", minutes)
! 38: }
! 39: return 60 * hours + sign * minutes
! 40: }
! 41:
! 42: # Given an OFFSET, which is a minute count like 300 or 330,
! 43: # return a %z-style abbreviation like "+05" or "+0530".
! 44: function offset_abbr(offset, \
! 45: hours, minutes, sign)
! 46: {
! 47: hours = int(offset / 60)
! 48: minutes = offset % 60
! 49: if (minutes) {
! 50: return sprintf("%+.4d", hours * 100 + minutes);
! 51: } else {
! 52: return sprintf("%+.2d", hours)
! 53: }
! 54: }
! 55:
! 56: # Round TIMESTAMP (a +-hh:mm:ss.dddd string) to the nearest second.
! 57: function round_to_second(timestamp, \
! 58: hh, mm, ss, seconds, dot_dddd, subseconds)
! 59: {
! 60: dot_dddd = timestamp
! 61: if (!sub(/^[+-]?[0-9]+:[0-9]+:[0-9]+\./, ".", dot_dddd))
! 62: return timestamp
! 63: hh = mm = ss = timestamp
! 64: sub(/^[-+]?[0-9]+:[0-9]+:/, "", ss)
! 65: sub(/^[-+]?[0-9]+:/, "", mm)
! 66: sub(/^[-+]?/, "", hh)
! 67: seconds = 3600 * hh + 60 * mm + ss
! 68: subseconds = +dot_dddd
! 69: seconds += 0.5 < subseconds || ((subseconds == 0.5) && (seconds % 2));
! 70: return sprintf("%s%d:%.2d:%.2d", timestamp ~ /^-/ ? "-" : "", \
! 71: seconds / 3600, seconds / 60 % 60, seconds % 60)
! 72: }
! 73:
! 74: BEGIN {
! 75: dataform_type["vanguard"] = 1
! 76: dataform_type["main"] = 1
! 77: dataform_type["rearguard"] = 1
! 78:
! 79: if (PACKRATLIST) {
! 80: while (getline <PACKRATLIST) {
! 81: if ($0 ~ /^#/) continue
! 82: packratlist[$3] = 1
! 83: }
! 84: }
! 85:
! 86: # The command line should set DATAFORM.
! 87: if (!dataform_type[DATAFORM]) exit 1
! 88: }
! 89:
! 90: $1 == "#PACKRATLIST" && $2 == PACKRATLIST {
! 91: sub(/^#PACKRATLIST[\t ]+[^\t ]+[\t ]+/, "")
! 92: }
! 93:
! 94: /^Zone/ { zone = $2 }
! 95:
! 96: DATAFORM != "main" {
! 97: in_comment = $0 ~ /^#/
! 98: uncomment = comment_out = 0
! 99:
! 100: # If this line should differ due to Czechoslovakia using negative SAVE values,
! 101: # uncomment the desired version and comment out the undesired one.
! 102: if (zone == "Europe/Prague" && $0 ~ /^#?[\t ]+[01]:00[\t ]/ \
! 103: && $0 ~ /1947 Feb 23/) {
! 104: if (($(in_comment + 2) != "-") == (DATAFORM != "rearguard")) {
! 105: uncomment = in_comment
! 106: } else {
! 107: comment_out = !in_comment
! 108: }
! 109: }
! 110:
! 111: # If this line should differ due to Ireland using negative SAVE values,
! 112: # uncomment the desired version and comment out the undesired one.
! 113: Rule_Eire = $0 ~ /^#?Rule[\t ]+Eire[\t ]/
! 114: Zone_Dublin_post_1968 \
! 115: = (zone == "Europe/Dublin" && $0 ~ /^#?[\t ]+[01]:00[\t ]/ \
! 116: && (!$(in_comment + 4) || 1968 < $(in_comment + 4)))
! 117: if (Rule_Eire || Zone_Dublin_post_1968) {
! 118: if ((Rule_Eire \
! 119: || (Zone_Dublin_post_1968 && $(in_comment + 3) == "IST/GMT")) \
! 120: == (DATAFORM != "rearguard")) {
! 121: uncomment = in_comment
! 122: } else {
! 123: comment_out = !in_comment
! 124: }
! 125: }
! 126:
! 127: # If this line should differ due to Namibia using negative SAVE values,
! 128: # uncomment the desired version and comment out the undesired one.
! 129: Rule_Namibia = $0 ~ /^#?Rule[\t ]+Namibia[\t ]/
! 130: Zone_using_Namibia_rule \
! 131: = (zone == "Africa/Windhoek" && $0 ~ /^#?[\t ]+[12]:00[\t ]/ \
! 132: && ($(in_comment + 2) == "Namibia" \
! 133: || ($(in_comment + 2) == "-" && $(in_comment + 3) == "CAT" \
! 134: && ((1994 <= $(in_comment + 4) && $(in_comment + 4) <= 2017) \
! 135: || in_comment + 3 == NF))))
! 136: if (Rule_Namibia || Zone_using_Namibia_rule) {
! 137: if ((Rule_Namibia \
! 138: ? ($9 ~ /^-/ || ($9 == 0 && $10 == "CAT")) \
! 139: : $(in_comment + 1) == "2:00" && $(in_comment + 2) == "Namibia") \
! 140: == (DATAFORM != "rearguard")) {
! 141: uncomment = in_comment
! 142: } else {
! 143: comment_out = !in_comment
! 144: }
! 145: }
! 146:
! 147: # If this line should differ due to Portugal benefiting from %z if supported,
! 148: # uncomment the desired version and comment out the undesired one.
! 149: if ($0 ~ /^#?[\t ]+-[12]:00[\t ]+Port[\t ]+[%+-]/) {
! 150: if (($0 ~ /%z/) == (DATAFORM == "vanguard")) {
! 151: uncomment = in_comment
! 152: } else {
! 153: comment_out = !in_comment
! 154: }
! 155: }
! 156:
! 157: # In vanguard form, use the line "Zone GMT 0 - GMT" instead of
! 158: # "Zone Etc/GMT 0 - GMT" and adjust Link lines accordingly.
! 159: # This works around a bug in TZUpdater 2.3.2.
! 160: if (/^#?(Zone|Link)[\t ]+(Etc\/)?GMT[\t ]/) {
! 161: if (($2 == "GMT") == (DATAFORM == "vanguard")) {
! 162: uncomment = in_comment
! 163: } else {
! 164: comment_out = !in_comment
! 165: }
! 166: }
! 167:
! 168: if (uncomment) {
! 169: sub(/^#/, "")
! 170: }
! 171: if (comment_out) {
! 172: sub(/^/, "#")
! 173: }
! 174:
! 175: # Prefer %z in vanguard form, explicit abbreviations otherwise.
! 176: if (DATAFORM == "vanguard") {
! 177: sub(/^(Zone[\t ]+[^\t ]+)?[\t ]+[^\t ]+[\t ]+[^\t ]+[\t ]+[-+][^\t ]+/, \
! 178: "&CHANGE-TO-%z")
! 179: sub(/-00CHANGE-TO-%z/, "-00")
! 180: sub(/[-+][^\t ]+CHANGE-TO-/, "")
! 181: } else {
! 182: if ($0 ~ /^[^#]*%z/) {
! 183: stdoff_column = 2 * ($0 ~ /^Zone/) + 1
! 184: rules_column = stdoff_column + 1
! 185: stdoff = get_minutes($stdoff_column)
! 186: rules = $rules_column
! 187: stdabbr = offset_abbr(stdoff)
! 188: if (rules == "-") {
! 189: abbr = stdabbr
! 190: } else {
! 191: dstabbr_only = rules ~ /^[+0-9-]/
! 192: if (dstabbr_only) {
! 193: dstoff = get_minutes(rules)
! 194: } else {
! 195: # The DST offset is normally an hour, but there are special cases.
! 196: if (rules == "Morocco" && NF == 3) {
! 197: dstoff = -60
! 198: } else if (rules == "NBorneo") {
! 199: dstoff = 20
! 200: } else if (((rules == "Cook" || rules == "LH") && NF == 3) \
! 201: || (rules == "Uruguay" \
! 202: && $0 ~ /[\t ](1942 Dec 14|1960|1970|1974 Dec 22)$/)) {
! 203: dstoff = 30
! 204: } else if (rules == "Uruguay" && $0 ~ /[\t ]1974 Mar 10$/) {
! 205: dstoff = 90
! 206: } else {
! 207: dstoff = 60
! 208: }
! 209: }
! 210: dstabbr = offset_abbr(stdoff + dstoff)
! 211: if (dstabbr_only) {
! 212: abbr = dstabbr
! 213: } else {
! 214: abbr = stdabbr "/" dstabbr
! 215: }
! 216: }
! 217: sub(/%z/, abbr)
! 218: }
! 219: }
! 220:
! 221: # Normally, prefer whole seconds. However, prefer subseconds
! 222: # if generating vanguard form and the otherwise-undocumented
! 223: # VANGUARD_SUBSECONDS environment variable is set.
! 224: # This relies on #STDOFF comment lines in the data.
! 225: # It is for hypothetical clients that support UT offsets that are
! 226: # not integer multiples of one second (e.g., Europe/Lisbon, 1884 to 1912).
! 227: # No known clients need this currently, and this experimental
! 228: # feature may be changed or withdrawn in future releases.
! 229: if ($1 == "#STDOFF") {
! 230: stdoff = $2
! 231: rounded_stdoff = round_to_second(stdoff)
! 232: if (DATAFORM == "vanguard" && ENVIRON["VANGUARD_SUBSECONDS"]) {
! 233: stdoff_subst[0] = rounded_stdoff
! 234: stdoff_subst[1] = stdoff
! 235: } else {
! 236: stdoff_subst[0] = stdoff
! 237: stdoff_subst[1] = rounded_stdoff
! 238: }
! 239: } else if (stdoff_subst[0]) {
! 240: stdoff_column = 2 * ($0 ~ /^Zone/) + 1
! 241: stdoff_column_val = $stdoff_column
! 242: if (stdoff_column_val == stdoff_subst[0]) {
! 243: sub(stdoff_subst[0], stdoff_subst[1])
! 244: } else if (stdoff_column_val != stdoff_subst[1]) {
! 245: stdoff_subst[0] = 0
! 246: }
! 247: }
! 248:
! 249: # In rearguard form, change the Japan rule line with "Sat>=8 25:00"
! 250: # to "Sun>=9 1:00", to cater to zic before 2007 and to older Java.
! 251: if ($0 ~ /^Rule/ && $2 == "Japan") {
! 252: if (DATAFORM == "rearguard") {
! 253: if ($7 == "Sat>=8" && $8 == "25:00") {
! 254: sub(/Sat>=8/, "Sun>=9")
! 255: sub(/25:00/, " 1:00")
! 256: }
! 257: } else {
! 258: if ($7 == "Sun>=9" && $8 == "1:00") {
! 259: sub(/Sun>=9/, "Sat>=8")
! 260: sub(/ 1:00/, "25:00")
! 261: }
! 262: }
! 263: }
! 264:
! 265: # In rearguard form, change the Morocco lines with negative SAVE values
! 266: # to use positive SAVE values.
! 267: if ($2 == "Morocco") {
! 268: if ($0 ~ /^Rule/) {
! 269: if ($4 ~ /^201[78]$/ && $6 == "Oct") {
! 270: if (DATAFORM == "rearguard") {
! 271: sub(/\t2018\t/, "\t2017\t")
! 272: } else {
! 273: sub(/\t2017\t/, "\t2018\t")
! 274: }
! 275: }
! 276:
! 277: if (2019 <= $3) {
! 278: if ($8 == "2:00") {
! 279: if (DATAFORM == "rearguard") {
! 280: sub(/\t0\t/, "\t1:00\t")
! 281: } else {
! 282: sub(/\t1:00\t/, "\t0\t")
! 283: }
! 284: } else {
! 285: if (DATAFORM == "rearguard") {
! 286: sub(/\t-1:00\t/, "\t0\t")
! 287: } else {
! 288: sub(/\t0\t/, "\t-1:00\t")
! 289: }
! 290: }
! 291: }
! 292: }
! 293: if ($1 ~ /^[+0-9-]/ && NF == 3) {
! 294: if (DATAFORM == "rearguard") {
! 295: sub(/1:00\tMorocco/, "0:00\tMorocco")
! 296: sub(/\t\+01\/\+00$/, "\t+00/+01")
! 297: } else {
! 298: sub(/0:00\tMorocco/, "1:00\tMorocco")
! 299: sub(/\t\+00\/+01$/, "\t+01/+00")
! 300: }
! 301: }
! 302: }
! 303: }
! 304:
! 305: /^Zone/ {
! 306: packrat_ignored = FILENAME == PACKRATDATA && PACKRATLIST && !packratlist[$2];
! 307: }
! 308: {
! 309: if (packrat_ignored && $0 !~ /^Rule/) {
! 310: sub(/^/, "#")
! 311: }
! 312: }
! 313:
! 314: # Return a link line resulting by changing OLDLINE to link to TARGET
! 315: # from LINKNAME, instead of linking to OLDTARGET from LINKNAME.
! 316: # Align data columns the same as they were in OLDLINE.
! 317: # Also, replace any existing white space followed by comment with COMMENT.
! 318: function make_linkline(oldline, target, linkname, oldtarget, comment, \
! 319: oldprefix, oldprefixlen, oldtargettabs, \
! 320: replsuffix, targettabs)
! 321: {
! 322: oldprefix = "Link\t" oldtarget "\t"
! 323: oldprefixlen = length(oldprefix)
! 324: if (substr(oldline, 1, oldprefixlen) == oldprefix) {
! 325: # Use tab stops to preserve LINKNAME's column.
! 326: replsuffix = substr(oldline, oldprefixlen + 1)
! 327: sub(/[\t ]*#.*/, "", replsuffix)
! 328: oldtargettabs = int(length(oldtarget) / 8) + 1
! 329: targettabs = int(length(target) / 8) + 1
! 330: for (; targettabs < oldtargettabs; targettabs++) {
! 331: replsuffix = "\t" replsuffix
! 332: }
! 333: for (; oldtargettabs < targettabs && replsuffix ~ /^\t/; targettabs--) {
! 334: replsuffix = substr(replsuffix, 2)
! 335: }
! 336: } else {
! 337: # Odd format line; don't bother lining up its replacement nicely.
! 338: replsuffix = linkname
! 339: }
! 340: return "Link\t" target "\t" replsuffix comment
! 341: }
! 342:
! 343: /^Link/ && $4 == "#=" && DATAFORM == "vanguard" {
! 344: $0 = make_linkline($0, $5, $3, $2)
! 345: }
! 346:
! 347: # If a Link line is followed by a Link or Zone line for the same data, comment
! 348: # out the Link line. This can happen if backzone overrides a Link
! 349: # with a Zone or a different Link.
! 350: /^Zone/ {
! 351: sub(/^Link/, "#Link", line[linkline[$2]])
! 352: }
! 353: /^Link/ {
! 354: sub(/^Link/, "#Link", line[linkline[$3]])
! 355: linkline[$3] = NR
! 356: linktarget[$3] = $2
! 357: }
! 358:
! 359: { line[NR] = $0 }
! 360:
! 361: function cut_link_chains_short( \
! 362: l, linkname, t, target)
! 363: {
! 364: for (linkname in linktarget) {
! 365: target = linktarget[linkname]
! 366: t = linktarget[target]
! 367: if (t) {
! 368: # TARGET is itself a link name. Replace the line "Link TARGET LINKNAME"
! 369: # with "Link T LINKNAME #= TARGET", where T is at the end of the chain
! 370: # of links that LINKNAME points to.
! 371: while ((u = linktarget[t])) {
! 372: t = u
! 373: }
! 374: l = linkline[linkname]
! 375: line[l] = make_linkline(line[l], t, linkname, target, "\t#= " target)
! 376: }
! 377: }
! 378: }
! 379:
! 380: END {
! 381: if (DATAFORM != "vanguard") {
! 382: cut_link_chains_short()
! 383: }
! 384: for (i = 1; i <= NR; i++)
! 385: print line[i]
! 386: }