#!/bin/ksh # # *-ja-holidays_csv.sh - writes Japanese holidays in CSV # # Copyright (C) 2020 Taiji Yamada # # usage: # $ ./$0 | sort | less # $ ./$0 | sort | pbcopy # type="$(basename "$0" -ja-holidays_csv.sh)" if [ "$type" = "apple" ]; then URL_HOLIDAYS_ICS='http://files.apple.com/calendars/Japanese32Holidays.ics' elif [ "$type" = "google" ]; then URL_HOLIDAYS_ICS='https://calendar.google.com/calendar/ical/ja.japanese%23holiday%40group.v.calendar.google.com/public/basic.ics' else URL_HOLIDAYS_ICS='https://www.mozilla.org/projects/calendar/caldata/JapanHolidays.ics' fi # # References: # # [1] https://qiita.com/richmikan@github/items/9090407e3ab9cd3e80b2 # GNU Awk に、なぜかさらに sed の合わせ技というのが一貫性がなくて、私は好まない。 # For some reason, the combination of GNU Awk and sed is inconsistent and I don't like it. # # [2] https://qiita.com/amanoese/items/7db079377d6ebc215f60 # Awk に Perl に、さらに egrep をシンプルに組み合わせているが、これは効率が悪い。 # Awk with Perl and a simple combination of egrep, which is inefficient. # # [3] https://www.magtranetwork.com/aws/shell_script_get_holidays.html # grep, cat, 多段の sed をシンプルに組み合わせているが、これは効率が悪い。 # It is a simple combination of grep, cat, and multi-stage sed, which is inefficient. # # (*) そもそも、DTSTART と SUMMARY の出現順序がこの順であることを前提に作られているが、 # 逆に出現しても iCal の仕様上は誤りではないはずである。 # それに対応しているのは、筆者の sed スクリプトと [2] だけである。 # # (*) In the first place, it is assumed that DTSTART and SUMMARY appear in that order, # but even if they appear in the opposite order, it wouldn't be a mistake according to the iCal specification. # Only the sed script and [2] correspond to it. # # for apple, google and mozilla, also for strings containing commas ! : || curl "$URL_HOLIDAYS_ICS" 2> /dev/null | sed -n 's/ $// /^BEGIN:VEVENT$/,/^END:VEVENT$/{ /^BEGIN:VEVENT$/h /^DTSTART/H /^SUMMARY/H /^END:VEVENT$/{ H g s/ / /g s/\n/ /g h s/^.* DTSTART;VALUE=DATE.*:$[0-9][0-9][0-9][0-9]$$[0-9][0-9]$$[0-9][0-9]$ .*$/\1\/\2\/\3/ x s/^.* SUMMARY:$[^ ]*$ .*$/\1/ /,/{ s/"/""/g s/^$.*$$/"\1"/ } H g s/\n/,/g s/$/ / p } } ' exit $? cat < /dev/null # sed スクリプトの解説 s/ $// # CRLF(^M^J) であれば LF(^J) に変換 /^BEGIN:VEVENT$/,/^END:VEVENT$/{ # これらに囲まれた行のみについて、 /^BEGIN:VEVENT$/h # こうした行なら、パターンスペースをホールドスペースに置換 /^DTSTART/H # これから始まる行なら、パターンスペースをホールドスペースに改行(^J)区切りで追加 /^SUMMARY/H # 〃 /^END:VEVENT$/{ # こうした行なら、 H # パターンスペースをホールドスペースに改行(^J)区切りで追加。これで「BEGIN:VEVENT〜END:VEVENT」の行すべてがホールドスペースに保持 g # ホールドスペースをパターンスペースに置換 s/ / /g # 万が一タブ(^I)があると次の処理で破綻してしまうので空白にすべて変換 s/\n/ /g # 改行はすべてタブに変換 h # パターンスペースをホールドスペースに置換。これで「BEGIN:VEVENT〜END:VEVENT」の行すべてがホールドスペースに、１行になって、保持 s/^.* DTSTART;VALUE=DATE.*:$[0-9][0-9][0-9][0-9]$$[0-9][0-9]$$[0-9][0-9]$ .*$/\1\/\2\/\3/ # この正規表現から「2020/01/01」のような形式の日付を取得 x # ホールドスペースとパタースペースを交換 s/^.* SUMMARY:$[^ ]*$ .*$/\1/ # この正規表現から、祝日のサマリの文字列を取得 /,/{ # 万が一カンマ(,)があるとCSVとして破綻するので、そのようなときは、 s/"/""/g # 次に先立ち、予めダブルクォート(")があるときはダブルクォート(")ですべてエスケープ s/^$.*$$/"\1"/ # サマリの文字列をダブルクォート(")で囲む } # H # パターンスペースをホールドスペースに改行(^J)区切りで追加。これで「2020/0101^J元日」のような行がホールドスペースに保持 g # ホールドスペースをパターンスペースに置換 s/\n/,/g # 改行(^J)をカンマ(,)にすべて変換 s/$/ / # レコード区切りをCRLF(^M^J)にし、CSV形式とする p # パターンスペースを印字 } } EOF # [1] only for google iCal : || curl -s "$URL_HOLIDAYS_ICS" | egrep '^DTSTART|^SUMMARY' | awk -F: '{print $2}' | perl -pe 's/(?<=\d)\r\n/ /m' # [2] for apple, google and mozilla iCal : || curl -s "$URL_HOLIDAYS_ICS" | sed -n '/^BEGIN:VEVENT/,/^END:VEVENT/p' | awk ' /^BEGIN:VEVENT/{ rec++; } match($0,/^DTSTART.*DATE.*:/){ # 些細な修正 print rec,1,substr($0,RLENGTH+1); } match($0,/^SUMMARY:/){ s=substr($0,RLENGTH+1); gsub(/ /,"_",s); print rec,2,s; }' | sort -k1n,1 -k2n,2 | awk ' $2==1{printf("%d ",$3);} $2==2{print $3;} ' # [3] for apple and google iCal : || curl -s "$URL_HOLIDAYS_ICS" | grep -e "DTSTART;VALUE=DATE" -e SUMMARY -e "BEGIN:VEVENT" -e "END:VEVENT" | tr -d '\r\n' | sed 's/END:VEVENT/\n/g' | sed 's/BEGIN:VEVENT//g' | sed 's/:/ /g' | sed 's/SUMMARY/ /g' | awk '{print $2,$NF}' | awk '{if($1 ~/^[0-9]+$/){print $1,$NF}else{print $NF,$1}}'