scripts/elhuyar.sh
(Deskargatu)
#!/bin/bash
export LANGUAGE=es_ES.UTF-8
export LANG=es_ES.UTF-8
export LC_ALL=es_ES.UTF-8
SEARCH=$@
# Main URL to be parsed
URL="https://hiztegiak.elhuyar.eus/eu_es/$SEARCH"
CONTENT=$(curl $COOKIE -s "$URL")
if [ "$0" != "$BASH_SOURCE" ]; then
_autocomplete() {
while read prop
do
COMPREPLY+=($prop)
done <<< $(curl -s "https://hiztegiak.elhuyar.eus/autocomplete/?term=$2&hizkuntza=eu" | jq '.[] .label' |perl -pe "s/.*sarrera'>(.*?)<.*/\$1/g;")
return 0
}
complete -F "_autocomplete" "elhuyar"
return
fi
# Parse every lehena field, and display result
while read -r line
do
# remove html tags from line or html special tags
echo $line | perl -pe 's/<p/\n<p/g; s/<[^>]+>|&#\d+;//g;'
# Split data by html class .lehena
done < <(echo $CONTENT | \
perl -pe 's/[\r\n]+//g; s/(<p class="lehena")/\n$1/g; s/(<\/li>)/$1\n/g;' | \
grep '<p class="lehena"'
);
echo -e "\n\Beste hiztegi unitateak:"
echo $CONTENT | \
perl -pe 's/^.*?(erakutsi_sarrera_berria\(.*?a>)/\n$1/g;' | \
grep erakutsi_sarrera_berria | \
perl -pe 's/(erakutsi_sarrera_berria.*?a>).*/$1/g;' | \
(
while read line
do
line=$(echo $line| tr "'" '"')
NAME=$(echo $line | perl -pe 's/erakutsi_sarrera_berria[^>]+>(.*)<.*/$1/g;')
echo " - $NAME"
done
)