From 113d54f5243f68be7620b22b5651400d07af36d2 Mon Sep 17 00:00:00 2001 From: tv Date: Tue, 30 Aug 2011 12:37:14 +0200 Subject: util hrefs: initial commit The hrefs util extracts hrefs from html-stdin... we could use query for this kind of extraction but hrefs is older and works already as fast as light.. not^_^ --- util/bin/hrefs | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100755 util/bin/hrefs (limited to 'util') diff --git a/util/bin/hrefs b/util/bin/hrefs new file mode 100755 index 00000000..3a1a51de --- /dev/null +++ b/util/bin/hrefs @@ -0,0 +1,20 @@ +#! /bin/sh + +_hrefs() { + sed -n 's/href="\([^"]\+\)"/\n&\n/gp' | + sed -n 's/^href="\([^"]\+\)"$/\1/p'; } + +_add_prefix_to_relative_hrefs() { + sed '/^http:/!s^'"$1"''; } + +_main() { + case $# in + (0) _hrefs;; + (1) _hrefs | _add_prefix_to_relative_hrefs "$1";; + (*) + echo "bad command line: $0 $*" >&2; exit 23;; + esac; } + +set -euf +_main "$@" +#### end of file. -- cgit v1.2.3 From 2ee8173625ffe8683fb18384a96f7b44b5ddc8fb Mon Sep 17 00:00:00 2001 From: tv Date: Tue, 6 Sep 2011 22:48:35 +0200 Subject: //util dict.leo.org: initial import --- util/bin/dict.leo.org | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100755 util/bin/dict.leo.org (limited to 'util') diff --git a/util/bin/dict.leo.org b/util/bin/dict.leo.org new file mode 100755 index 00000000..6a9d9669 --- /dev/null +++ b/util/bin/dict.leo.org @@ -0,0 +1,87 @@ +#! /bin/sh +#### dict.leo.org version 2.0 beta 1 +set -euf + +cache_dir=/tmp/dict.leo.org/ +file="$cache_dir$*" + +url="http://dict.leo.org/?$*" + +# TODO check sanity of filename + +if test -d "$cache_dir" ; then + test -e "$file" || { curl --silent "$url" | tee "$file" ; } +else + curl --silent "$url" +fi | +sed 's/>\( *.\)/>\n\1/g' | sed -rn ' + s/[[:space:]]/ /g + //,/<\/td>/b + //,/^<\/td>/b + //,/^ <\/table>/b + //,/<\/table>/b + /