src/share/zoneinfo/zishrink.awk - annotate

Return to zishrink.awk CVS log
Up to [local] / src / share / zoneinfo
Annotation of src/share/zoneinfo/zishrink.awk, Revision 1.1

1.1     ! millert     1: # Convert tzdata source into a smaller version of itself.
        !             2:
        !             3: # Contributed by Paul Eggert.  This file is in the public domain.
        !             4:
        !             5: # This is not a general-purpose converter; it is designed for current tzdata.
        !             6: # 'zic' should treat this script's output as if it were identical to
        !             7: # this script's input.
        !             8:
        !             9: # Record a hash N for the new name NAME, checking for collisions.
        !            10:
        !            11: function record_hash(n, name)
        !            12: {
        !            13:   if (used_hashes[n]) {
        !            14:     printf "# ! collision: %s %s\n", used_hashes[n], name
        !            15:     exit 1
        !            16:   }
        !            17:   used_hashes[n] = name
        !            18: }
        !            19:
        !            20: # Return a shortened rule name representing NAME,
        !            21: # and record this relationship to the hash table.
        !            22:
        !            23: function gen_rule_name(name, \
        !            24:                       n)
        !            25: {
        !            26:   # Use a simple mnemonic: the first two letters.
        !            27:   n = substr(name, 1, 2)
        !            28:   record_hash(n, name)
        !            29:   # printf "# %s = %s\n", n, name
        !            30:   return n
        !            31: }
        !            32:
        !            33: function prehash_rule_names( \
        !            34:                            name)
        !            35: {
        !            36:   # Rule names are not part of the tzdb API, so substitute shorter
        !            37:   # ones.  Shortening them consistently from one release to the next
        !            38:   # simplifies comparison of the output.  That being said, the
        !            39:   # 1-letter names below are not standardized in any way, and can
        !            40:   # change arbitrarily from one release to the next, as the main goal
        !            41:   # here is compression not comparison.
        !            42:
        !            43:   # Abbreviating these rules names to one letter saved the most space
        !            44:   # circa 2018e.
        !            45:   rule["Arg"] = "A"
        !            46:   rule["Brazil"] = "B"
        !            47:   rule["Canada"] = "C"
        !            48:   rule["Denmark"] = "D"
        !            49:   rule["EU"] = "E"
        !            50:   rule["France"] = "F"
        !            51:   rule["GB-Eire"] = "G"
        !            52:   rule["Halifax"] = "H"
        !            53:   rule["Italy"] = "I"
        !            54:   rule["Jordan"] = "J"
        !            55:   rule["Egypt"] = "K" # "Kemet" in ancient Egyptian
        !            56:   rule["Libya"] = "L"
        !            57:   rule["Morocco"] = "M"
        !            58:   rule["Neth"] = "N"
        !            59:   rule["Poland"] = "O" # arbitrary
        !            60:   rule["Palestine"] = "P"
        !            61:   rule["Cuba"] = "Q" # Its start sounds like "Q".
        !            62:   rule["Russia"] = "R"
        !            63:   rule["Syria"] = "S"
        !            64:   rule["Turkey"] = "T"
        !            65:   rule["Uruguay"] = "U"
        !            66:   rule["Vincennes"] = "V"
        !            67:   rule["Winn"] = "W"
        !            68:   rule["Mongol"] = "X" # arbitrary
        !            69:   rule["NT_YK"] = "Y"
        !            70:   rule["Zion"] = "Z"
        !            71:   rule["Austria"] = "a"
        !            72:   rule["Belgium"] = "b"
        !            73:   rule["C-Eur"] = "c"
        !            74:   rule["Algeria"] = "d" # country code DZ
        !            75:   rule["E-Eur"] = "e"
        !            76:   rule["Taiwan"] = "f" # Formosa
        !            77:   rule["Greece"] = "g"
        !            78:   rule["Hungary"] = "h"
        !            79:   rule["Iran"] = "i"
        !            80:   rule["StJohns"] = "j"
        !            81:   rule["Chatham"] = "k" # arbitrary
        !            82:   rule["Lebanon"] = "l"
        !            83:   rule["Mexico"] = "m"
        !            84:   rule["Tunisia"] = "n" # country code TN
        !            85:   rule["Moncton"] = "o" # arbitrary
        !            86:   rule["Port"] = "p"
        !            87:   rule["Albania"] = "q" # arbitrary
        !            88:   rule["Regina"] = "r"
        !            89:   rule["Spain"] = "s"
        !            90:   rule["Toronto"] = "t"
        !            91:   rule["US"] = "u"
        !            92:   rule["Louisville"] = "v" # ville
        !            93:   rule["Iceland"] = "w" # arbitrary
        !            94:   rule["Chile"] = "x" # arbitrary
        !            95:   rule["Para"] = "y" # country code PY
        !            96:   rule["Romania"] = "z" # arbitrary
        !            97:   rule["Macau"] = "_" # arbitrary
        !            98:
        !            99:   # Use ISO 3166 alpha-2 country codes for remaining names that are countries.
        !           100:   # This is more systematic, and avoids collisions (e.g., Malta and Moldova).
        !           101:   rule["Armenia"] = "AM"
        !           102:   rule["Aus"] = "AU"
        !           103:   rule["Azer"] = "AZ"
        !           104:   rule["Barb"] = "BB"
        !           105:   rule["Dhaka"] = "BD"
        !           106:   rule["Bulg"] = "BG"
        !           107:   rule["Bahamas"] = "BS"
        !           108:   rule["Belize"] = "BZ"
        !           109:   rule["Swiss"] = "CH"
        !           110:   rule["Cook"] = "CK"
        !           111:   rule["PRC"] = "CN"
        !           112:   rule["Cyprus"] = "CY"
        !           113:   rule["Czech"] = "CZ"
        !           114:   rule["Germany"] = "DE"
        !           115:   rule["DR"] = "DO"
        !           116:   rule["Ecuador"] = "EC"
        !           117:   rule["Finland"] = "FI"
        !           118:   rule["Fiji"] = "FJ"
        !           119:   rule["Falk"] = "FK"
        !           120:   rule["Ghana"] = "GH"
        !           121:   rule["Guat"] = "GT"
        !           122:   rule["Hond"] = "HN"
        !           123:   rule["Haiti"] = "HT"
        !           124:   rule["Eire"] = "IE"
        !           125:   rule["Iraq"] = "IQ"
        !           126:   rule["Japan"] = "JP"
        !           127:   rule["Kyrgyz"] = "KG"
        !           128:   rule["ROK"] = "KR"
        !           129:   rule["Latvia"] = "LV"
        !           130:   rule["Lux"] = "LX"
        !           131:   rule["Moldova"] = "MD"
        !           132:   rule["Malta"] = "MT"
        !           133:   rule["Mauritius"] = "MU"
        !           134:   rule["Namibia"] = "NA"
        !           135:   rule["Nic"] = "NI"
        !           136:   rule["Norway"] = "NO"
        !           137:   rule["Peru"] = "PE"
        !           138:   rule["Phil"] = "PH"
        !           139:   rule["Pakistan"] = "PK"
        !           140:   rule["Sudan"] = "SD"
        !           141:   rule["Salv"] = "SV"
        !           142:   rule["Tonga"] = "TO"
        !           143:   rule["Vanuatu"] = "VU"
        !           144:
        !           145:   # Avoid collisions.
        !           146:   rule["Detroit"] = "Dt" # De = Denver
        !           147:
        !           148:   for (name in rule) {
        !           149:     record_hash(rule[name], name)
        !           150:   }
        !           151: }
        !           152:
        !           153: function make_line(n, field, \
        !           154:                   f, r)
        !           155: {
        !           156:   r = field[1]
        !           157:   for (f = 2; f <= n; f++)
        !           158:     r = r " " field[f]
        !           159:   return r
        !           160: }
        !           161:
        !           162: # Process the input line LINE and save it for later output.
        !           163:
        !           164: function process_input_line(line, \
        !           165:                            f, field, end, i, n, r, startdef, \
        !           166:                            linkline, ruleline, zoneline)
        !           167: {
        !           168:   # Remove comments, normalize spaces, and append a space to each line.
        !           169:   sub(/#.*/, "", line)
        !           170:   line = line " "
        !           171:   gsub(/[\t ]+/, " ", line)
        !           172:
        !           173:   # Abbreviate keywords and determine line type.
        !           174:   linkline = sub(/^Link /, "L ", line)
        !           175:   ruleline = sub(/^Rule /, "R ", line)
        !           176:   zoneline = sub(/^Zone /, "Z ", line)
        !           177:
        !           178:   # Replace FooAsia rules with the same rules without "Asia", as they
        !           179:   # are duplicates.
        !           180:   if (match(line, /[^ ]Asia /)) {
        !           181:     if (ruleline) return
        !           182:     line = substr(line, 1, RSTART) substr(line, RSTART + 5)
        !           183:   }
        !           184:
        !           185:   # Abbreviate times.
        !           186:   while (match(line, /[: ]0+[0-9]/))
        !           187:     line = substr(line, 1, RSTART) substr(line, RSTART + RLENGTH - 1)
        !           188:   while (match(line, /:0[^:]/))
        !           189:     line = substr(line, 1, RSTART - 1) substr(line, RSTART + 2)
        !           190:
        !           191:   # Abbreviate weekday names.
        !           192:   while (match(line, / (last)?(Mon|Wed|Fri)[ <>]/)) {
        !           193:     end = RSTART + RLENGTH
        !           194:     line = substr(line, 1, end - 4) substr(line, end - 1)
        !           195:   }
        !           196:   while (match(line, / (last)?(Sun|Tue|Thu|Sat)[ <>]/)) {
        !           197:     end = RSTART + RLENGTH
        !           198:     line = substr(line, 1, end - 3) substr(line, end - 1)
        !           199:   }
        !           200:
        !           201:   # Abbreviate "max", "min", "only" and month names.
        !           202:   gsub(/ max /, " ma ", line)
        !           203:   gsub(/ min /, " mi ", line)
        !           204:   gsub(/ only /, " o ", line)
        !           205:   gsub(/ Jan /, " Ja ", line)
        !           206:   gsub(/ Feb /, " F ", line)
        !           207:   gsub(/ Apr /, " Ap ", line)
        !           208:   gsub(/ Aug /, " Au ", line)
        !           209:   gsub(/ Sep /, " S ", line)
        !           210:   gsub(/ Oct /, " O ", line)
        !           211:   gsub(/ Nov /, " N ", line)
        !           212:   gsub(/ Dec /, " D ", line)
        !           213:
        !           214:   # Strip leading and trailing space.
        !           215:   sub(/^ /, "", line)
        !           216:   sub(/ $/, "", line)
        !           217:
        !           218:   # Remove unnecessary trailing zero fields.
        !           219:   sub(/ 0+$/, "", line)
        !           220:
        !           221:   # Remove unnecessary trailing days-of-month "1".
        !           222:   if (match(line, /[A-Za-z] 1$/))
        !           223:     line = substr(line, 1, RSTART)
        !           224:
        !           225:   # Remove unnecessary trailing " Ja" (for January).
        !           226:   sub(/ Ja$/, "", line)
        !           227:
        !           228:   n = split(line, field)
        !           229:
        !           230:   # Record which rule names are used, and generate their abbreviations.
        !           231:   f = zoneline ? 4 : linkline || ruleline ? 0 : 2
        !           232:   r = field[f]
        !           233:   if (r ~ /^[^-+0-9]/) {
        !           234:     rule_used[r] = 1
        !           235:   }
        !           236:
        !           237:   # If this zone supersedes an earlier one, delete the earlier one
        !           238:   # from the saved output lines.
        !           239:   startdef = ""
        !           240:   if (zoneline)
        !           241:     zonename = startdef = field[2]
        !           242:   else if (linkline)
        !           243:     zonename = startdef = field[3]
        !           244:   else if (ruleline)
        !           245:     zonename = ""
        !           246:   if (startdef) {
        !           247:     i = zonedef[startdef]
        !           248:     if (i) {
        !           249:       do
        !           250:        output_line[i - 1] = ""
        !           251:       while (output_line[i++] ~ /^[-+0-9]/);
        !           252:     }
        !           253:   }
        !           254:   zonedef[zonename] = nout + 1
        !           255:
        !           256:   # Save the line for later output.
        !           257:   output_line[nout++] = make_line(n, field)
        !           258: }
        !           259:
        !           260: function omit_unused_rules( \
        !           261:                           i, field)
        !           262: {
        !           263:   for (i = 0; i < nout; i++) {
        !           264:     split(output_line[i], field)
        !           265:     if (field[1] == "R" && !rule_used[field[2]]) {
        !           266:       output_line[i] = ""
        !           267:     }
        !           268:   }
        !           269: }
        !           270:
        !           271: function abbreviate_rule_names( \
        !           272:                               abbr, f, field, i, n, r)
        !           273: {
        !           274:   for (i = 0; i < nout; i++) {
        !           275:     n = split(output_line[i], field)
        !           276:     if (n) {
        !           277:       f = field[1] == "Z" ? 4 : field[1] == "L" ? 0 : 2
        !           278:       r = field[f]
        !           279:       if (r ~ /^[^-+0-9]/) {
        !           280:        abbr = rule[r]
        !           281:        if (!abbr) {
        !           282:          rule[r] = abbr = gen_rule_name(r)
        !           283:        }
        !           284:        field[f] = abbr
        !           285:        output_line[i] = make_line(n, field)
        !           286:       }
        !           287:     }
        !           288:   }
        !           289: }
        !           290:
        !           291: function output_saved_lines( \
        !           292:                            i)
        !           293: {
        !           294:   for (i = 0; i < nout; i++)
        !           295:     if (output_line[i])
        !           296:       print output_line[i]
        !           297: }
        !           298:
        !           299: BEGIN {
        !           300:   # Files that the output normally depends on.
        !           301:   default_dep["africa"] = 1
        !           302:   default_dep["antarctica"] = 1
        !           303:   default_dep["asia"] = 1
        !           304:   default_dep["australasia"] = 1
        !           305:   default_dep["backward"] = 1
        !           306:   default_dep["etcetera"] = 1
        !           307:   default_dep["europe"] = 1
        !           308:   default_dep["factory"] = 1
        !           309:   default_dep["northamerica"] = 1
        !           310:   default_dep["southamerica"] = 1
        !           311:   default_dep["ziguard.awk"] = 1
        !           312:   default_dep["zishrink.awk"] = 1
        !           313:
        !           314:   # Output a version string from 'version' and related configuration variables
        !           315:   # supported by tzdb's Makefile.  If you change the makefile or any other files
        !           316:   # that affect the output of this script, you should append '-SOMETHING'
        !           317:   # to the contents of 'version', where SOMETHING identifies what was changed.
        !           318:
        !           319:   ndeps = split(deps, dep)
        !           320:   ddeps = ""
        !           321:   for (i = 1; i <= ndeps; i++) {
        !           322:     if (default_dep[dep[i]]) {
        !           323:       default_dep[dep[i]]++
        !           324:     } else {
        !           325:       ddeps = ddeps " " dep[i]
        !           326:     }
        !           327:   }
        !           328:   for (d in default_dep) {
        !           329:     if (default_dep[d] == 1) {
        !           330:       ddeps = ddeps " !" d
        !           331:     }
        !           332:   }
        !           333:   print "# version", version
        !           334:   if (dataform != "main") {
        !           335:     print "# dataform", dataform
        !           336:   }
        !           337:   if (redo != "posix_right") {
        !           338:     print "# redo " redo
        !           339:   }
        !           340:   if (ddeps) {
        !           341:     print "# ddeps" ddeps
        !           342:   }
        !           343:   print "# This zic input file is in the public domain."
        !           344:
        !           345:   prehash_rule_names()
        !           346: }
        !           347:
        !           348: /^[\t ]*[^#\t ]/ {
        !           349:   process_input_line($0)
        !           350: }
        !           351:
        !           352: END {
        !           353:   omit_unused_rules()
        !           354:   abbreviate_rule_names()
        !           355:   output_saved_lines()
        !           356: }