Annotation of src/share/zoneinfo/zishrink.awk, Revision 1.1
1.1 ! millert 1: # Convert tzdata source into a smaller version of itself.
! 2:
! 3: # Contributed by Paul Eggert. This file is in the public domain.
! 4:
! 5: # This is not a general-purpose converter; it is designed for current tzdata.
! 6: # 'zic' should treat this script's output as if it were identical to
! 7: # this script's input.
! 8:
! 9: # Record a hash N for the new name NAME, checking for collisions.
! 10:
! 11: function record_hash(n, name)
! 12: {
! 13: if (used_hashes[n]) {
! 14: printf "# ! collision: %s %s\n", used_hashes[n], name
! 15: exit 1
! 16: }
! 17: used_hashes[n] = name
! 18: }
! 19:
! 20: # Return a shortened rule name representing NAME,
! 21: # and record this relationship to the hash table.
! 22:
! 23: function gen_rule_name(name, \
! 24: n)
! 25: {
! 26: # Use a simple mnemonic: the first two letters.
! 27: n = substr(name, 1, 2)
! 28: record_hash(n, name)
! 29: # printf "# %s = %s\n", n, name
! 30: return n
! 31: }
! 32:
! 33: function prehash_rule_names( \
! 34: name)
! 35: {
! 36: # Rule names are not part of the tzdb API, so substitute shorter
! 37: # ones. Shortening them consistently from one release to the next
! 38: # simplifies comparison of the output. That being said, the
! 39: # 1-letter names below are not standardized in any way, and can
! 40: # change arbitrarily from one release to the next, as the main goal
! 41: # here is compression not comparison.
! 42:
! 43: # Abbreviating these rules names to one letter saved the most space
! 44: # circa 2018e.
! 45: rule["Arg"] = "A"
! 46: rule["Brazil"] = "B"
! 47: rule["Canada"] = "C"
! 48: rule["Denmark"] = "D"
! 49: rule["EU"] = "E"
! 50: rule["France"] = "F"
! 51: rule["GB-Eire"] = "G"
! 52: rule["Halifax"] = "H"
! 53: rule["Italy"] = "I"
! 54: rule["Jordan"] = "J"
! 55: rule["Egypt"] = "K" # "Kemet" in ancient Egyptian
! 56: rule["Libya"] = "L"
! 57: rule["Morocco"] = "M"
! 58: rule["Neth"] = "N"
! 59: rule["Poland"] = "O" # arbitrary
! 60: rule["Palestine"] = "P"
! 61: rule["Cuba"] = "Q" # Its start sounds like "Q".
! 62: rule["Russia"] = "R"
! 63: rule["Syria"] = "S"
! 64: rule["Turkey"] = "T"
! 65: rule["Uruguay"] = "U"
! 66: rule["Vincennes"] = "V"
! 67: rule["Winn"] = "W"
! 68: rule["Mongol"] = "X" # arbitrary
! 69: rule["NT_YK"] = "Y"
! 70: rule["Zion"] = "Z"
! 71: rule["Austria"] = "a"
! 72: rule["Belgium"] = "b"
! 73: rule["C-Eur"] = "c"
! 74: rule["Algeria"] = "d" # country code DZ
! 75: rule["E-Eur"] = "e"
! 76: rule["Taiwan"] = "f" # Formosa
! 77: rule["Greece"] = "g"
! 78: rule["Hungary"] = "h"
! 79: rule["Iran"] = "i"
! 80: rule["StJohns"] = "j"
! 81: rule["Chatham"] = "k" # arbitrary
! 82: rule["Lebanon"] = "l"
! 83: rule["Mexico"] = "m"
! 84: rule["Tunisia"] = "n" # country code TN
! 85: rule["Moncton"] = "o" # arbitrary
! 86: rule["Port"] = "p"
! 87: rule["Albania"] = "q" # arbitrary
! 88: rule["Regina"] = "r"
! 89: rule["Spain"] = "s"
! 90: rule["Toronto"] = "t"
! 91: rule["US"] = "u"
! 92: rule["Louisville"] = "v" # ville
! 93: rule["Iceland"] = "w" # arbitrary
! 94: rule["Chile"] = "x" # arbitrary
! 95: rule["Para"] = "y" # country code PY
! 96: rule["Romania"] = "z" # arbitrary
! 97: rule["Macau"] = "_" # arbitrary
! 98:
! 99: # Use ISO 3166 alpha-2 country codes for remaining names that are countries.
! 100: # This is more systematic, and avoids collisions (e.g., Malta and Moldova).
! 101: rule["Armenia"] = "AM"
! 102: rule["Aus"] = "AU"
! 103: rule["Azer"] = "AZ"
! 104: rule["Barb"] = "BB"
! 105: rule["Dhaka"] = "BD"
! 106: rule["Bulg"] = "BG"
! 107: rule["Bahamas"] = "BS"
! 108: rule["Belize"] = "BZ"
! 109: rule["Swiss"] = "CH"
! 110: rule["Cook"] = "CK"
! 111: rule["PRC"] = "CN"
! 112: rule["Cyprus"] = "CY"
! 113: rule["Czech"] = "CZ"
! 114: rule["Germany"] = "DE"
! 115: rule["DR"] = "DO"
! 116: rule["Ecuador"] = "EC"
! 117: rule["Finland"] = "FI"
! 118: rule["Fiji"] = "FJ"
! 119: rule["Falk"] = "FK"
! 120: rule["Ghana"] = "GH"
! 121: rule["Guat"] = "GT"
! 122: rule["Hond"] = "HN"
! 123: rule["Haiti"] = "HT"
! 124: rule["Eire"] = "IE"
! 125: rule["Iraq"] = "IQ"
! 126: rule["Japan"] = "JP"
! 127: rule["Kyrgyz"] = "KG"
! 128: rule["ROK"] = "KR"
! 129: rule["Latvia"] = "LV"
! 130: rule["Lux"] = "LX"
! 131: rule["Moldova"] = "MD"
! 132: rule["Malta"] = "MT"
! 133: rule["Mauritius"] = "MU"
! 134: rule["Namibia"] = "NA"
! 135: rule["Nic"] = "NI"
! 136: rule["Norway"] = "NO"
! 137: rule["Peru"] = "PE"
! 138: rule["Phil"] = "PH"
! 139: rule["Pakistan"] = "PK"
! 140: rule["Sudan"] = "SD"
! 141: rule["Salv"] = "SV"
! 142: rule["Tonga"] = "TO"
! 143: rule["Vanuatu"] = "VU"
! 144:
! 145: # Avoid collisions.
! 146: rule["Detroit"] = "Dt" # De = Denver
! 147:
! 148: for (name in rule) {
! 149: record_hash(rule[name], name)
! 150: }
! 151: }
! 152:
! 153: function make_line(n, field, \
! 154: f, r)
! 155: {
! 156: r = field[1]
! 157: for (f = 2; f <= n; f++)
! 158: r = r " " field[f]
! 159: return r
! 160: }
! 161:
! 162: # Process the input line LINE and save it for later output.
! 163:
! 164: function process_input_line(line, \
! 165: f, field, end, i, n, r, startdef, \
! 166: linkline, ruleline, zoneline)
! 167: {
! 168: # Remove comments, normalize spaces, and append a space to each line.
! 169: sub(/#.*/, "", line)
! 170: line = line " "
! 171: gsub(/[\t ]+/, " ", line)
! 172:
! 173: # Abbreviate keywords and determine line type.
! 174: linkline = sub(/^Link /, "L ", line)
! 175: ruleline = sub(/^Rule /, "R ", line)
! 176: zoneline = sub(/^Zone /, "Z ", line)
! 177:
! 178: # Replace FooAsia rules with the same rules without "Asia", as they
! 179: # are duplicates.
! 180: if (match(line, /[^ ]Asia /)) {
! 181: if (ruleline) return
! 182: line = substr(line, 1, RSTART) substr(line, RSTART + 5)
! 183: }
! 184:
! 185: # Abbreviate times.
! 186: while (match(line, /[: ]0+[0-9]/))
! 187: line = substr(line, 1, RSTART) substr(line, RSTART + RLENGTH - 1)
! 188: while (match(line, /:0[^:]/))
! 189: line = substr(line, 1, RSTART - 1) substr(line, RSTART + 2)
! 190:
! 191: # Abbreviate weekday names.
! 192: while (match(line, / (last)?(Mon|Wed|Fri)[ <>]/)) {
! 193: end = RSTART + RLENGTH
! 194: line = substr(line, 1, end - 4) substr(line, end - 1)
! 195: }
! 196: while (match(line, / (last)?(Sun|Tue|Thu|Sat)[ <>]/)) {
! 197: end = RSTART + RLENGTH
! 198: line = substr(line, 1, end - 3) substr(line, end - 1)
! 199: }
! 200:
! 201: # Abbreviate "max", "min", "only" and month names.
! 202: gsub(/ max /, " ma ", line)
! 203: gsub(/ min /, " mi ", line)
! 204: gsub(/ only /, " o ", line)
! 205: gsub(/ Jan /, " Ja ", line)
! 206: gsub(/ Feb /, " F ", line)
! 207: gsub(/ Apr /, " Ap ", line)
! 208: gsub(/ Aug /, " Au ", line)
! 209: gsub(/ Sep /, " S ", line)
! 210: gsub(/ Oct /, " O ", line)
! 211: gsub(/ Nov /, " N ", line)
! 212: gsub(/ Dec /, " D ", line)
! 213:
! 214: # Strip leading and trailing space.
! 215: sub(/^ /, "", line)
! 216: sub(/ $/, "", line)
! 217:
! 218: # Remove unnecessary trailing zero fields.
! 219: sub(/ 0+$/, "", line)
! 220:
! 221: # Remove unnecessary trailing days-of-month "1".
! 222: if (match(line, /[A-Za-z] 1$/))
! 223: line = substr(line, 1, RSTART)
! 224:
! 225: # Remove unnecessary trailing " Ja" (for January).
! 226: sub(/ Ja$/, "", line)
! 227:
! 228: n = split(line, field)
! 229:
! 230: # Record which rule names are used, and generate their abbreviations.
! 231: f = zoneline ? 4 : linkline || ruleline ? 0 : 2
! 232: r = field[f]
! 233: if (r ~ /^[^-+0-9]/) {
! 234: rule_used[r] = 1
! 235: }
! 236:
! 237: # If this zone supersedes an earlier one, delete the earlier one
! 238: # from the saved output lines.
! 239: startdef = ""
! 240: if (zoneline)
! 241: zonename = startdef = field[2]
! 242: else if (linkline)
! 243: zonename = startdef = field[3]
! 244: else if (ruleline)
! 245: zonename = ""
! 246: if (startdef) {
! 247: i = zonedef[startdef]
! 248: if (i) {
! 249: do
! 250: output_line[i - 1] = ""
! 251: while (output_line[i++] ~ /^[-+0-9]/);
! 252: }
! 253: }
! 254: zonedef[zonename] = nout + 1
! 255:
! 256: # Save the line for later output.
! 257: output_line[nout++] = make_line(n, field)
! 258: }
! 259:
! 260: function omit_unused_rules( \
! 261: i, field)
! 262: {
! 263: for (i = 0; i < nout; i++) {
! 264: split(output_line[i], field)
! 265: if (field[1] == "R" && !rule_used[field[2]]) {
! 266: output_line[i] = ""
! 267: }
! 268: }
! 269: }
! 270:
! 271: function abbreviate_rule_names( \
! 272: abbr, f, field, i, n, r)
! 273: {
! 274: for (i = 0; i < nout; i++) {
! 275: n = split(output_line[i], field)
! 276: if (n) {
! 277: f = field[1] == "Z" ? 4 : field[1] == "L" ? 0 : 2
! 278: r = field[f]
! 279: if (r ~ /^[^-+0-9]/) {
! 280: abbr = rule[r]
! 281: if (!abbr) {
! 282: rule[r] = abbr = gen_rule_name(r)
! 283: }
! 284: field[f] = abbr
! 285: output_line[i] = make_line(n, field)
! 286: }
! 287: }
! 288: }
! 289: }
! 290:
! 291: function output_saved_lines( \
! 292: i)
! 293: {
! 294: for (i = 0; i < nout; i++)
! 295: if (output_line[i])
! 296: print output_line[i]
! 297: }
! 298:
! 299: BEGIN {
! 300: # Files that the output normally depends on.
! 301: default_dep["africa"] = 1
! 302: default_dep["antarctica"] = 1
! 303: default_dep["asia"] = 1
! 304: default_dep["australasia"] = 1
! 305: default_dep["backward"] = 1
! 306: default_dep["etcetera"] = 1
! 307: default_dep["europe"] = 1
! 308: default_dep["factory"] = 1
! 309: default_dep["northamerica"] = 1
! 310: default_dep["southamerica"] = 1
! 311: default_dep["ziguard.awk"] = 1
! 312: default_dep["zishrink.awk"] = 1
! 313:
! 314: # Output a version string from 'version' and related configuration variables
! 315: # supported by tzdb's Makefile. If you change the makefile or any other files
! 316: # that affect the output of this script, you should append '-SOMETHING'
! 317: # to the contents of 'version', where SOMETHING identifies what was changed.
! 318:
! 319: ndeps = split(deps, dep)
! 320: ddeps = ""
! 321: for (i = 1; i <= ndeps; i++) {
! 322: if (default_dep[dep[i]]) {
! 323: default_dep[dep[i]]++
! 324: } else {
! 325: ddeps = ddeps " " dep[i]
! 326: }
! 327: }
! 328: for (d in default_dep) {
! 329: if (default_dep[d] == 1) {
! 330: ddeps = ddeps " !" d
! 331: }
! 332: }
! 333: print "# version", version
! 334: if (dataform != "main") {
! 335: print "# dataform", dataform
! 336: }
! 337: if (redo != "posix_right") {
! 338: print "# redo " redo
! 339: }
! 340: if (ddeps) {
! 341: print "# ddeps" ddeps
! 342: }
! 343: print "# This zic input file is in the public domain."
! 344:
! 345: prehash_rule_names()
! 346: }
! 347:
! 348: /^[\t ]*[^#\t ]/ {
! 349: process_input_line($0)
! 350: }
! 351:
! 352: END {
! 353: omit_unused_rules()
! 354: abbreviate_rule_names()
! 355: output_saved_lines()
! 356: }