44uk
9/22/2013 - 5:25 AM

IPA 情報処理技術者試験過去問のURLリスト作成

IPA 情報処理技術者試験過去問のURLリスト作成

#!/bin/sh -x

if [ ! -x "`which wget`" ]; then
  echo "Required wget."
  exit 1
fi

mkdir exams

# PDF の url 抽出
wget --spider -nd -nc -nv -np -r -Apdf http://www.jitec.ipa.go.jp/1_04hanni_sukiru/_index_hanni_skill.html 2>&1 \
  | grep .pdf \
  | awk '{print $4}' \
  | sort -u > all.txt

# 試験ごとの url 分離
cat all.txt | awk '/_ad_/ {print >> "exams/ad_urls.txt"} /_ae_/ {print >> "exams/ae_urls.txt"} /_an_/ {print >> "exams/an_urls.txt"}'
cat all.txt | awk '/_anpmae_/ {print >> "exams/anpmae_urls.txt"} /_ap_/ {print >> "exams/ap_urls.txt"} /_au_/ {print >> "exams/au_urls.txt"}'
cat all.txt | awk '/_db_/ {print >> "exams/db_urls.txt"} /_es_/ {print >> "exams/es_urls.txt"} /_fe_/ {print >> "exams/fe_urls.txt"}'
cat all.txt | awk '/_haiten_/ {print >> "exams/haiten_urls.txt"} /_ip_/ {print >> "exams/ip_urls.txt"} /_koudo_/  {print >> "exams/koudo_urls.txt"}'
cat all.txt | awk '/_nw_/ {print >> "exams/nw_urls.txt"} /_pm_/ {print >> "exams/pm_urls.txt"} /_sa_/ {print >> "exams/sa_urls.txt"}'
cat all.txt | awk '/_sc_/ {print >> "exams/sc_urls.txt"} /_sd_/ {print >> "exams/sd_urls.txt"} /_sm_/ {print >> "exams/sm_urls.txt"}'
cat all.txt | awk '/_st_/ {print >> "exams/st_urls.txt"} /_su_/ {print >> "exams/su_urls.txt"} /_sv_/ {print >> "exams/sv_urls.txt"} /_sw_/ {print >> "exams/sw_urls.txt"}'