stl-statilque-litterateur/var/lib/stl/scripts/check__article

718 lines
19 KiB
Bash
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# Name: Statique Littérateur
# Type: Article Checkers
# file: check__article
# Folder: /var/lib/stl/scripts/
# By echolib (XMPP: im@echolib.re)
# License: GNU AFFERO GENERAL PUBLIC LICENSE Version 3, 19 November 2007
#------------
# funny stats
#------------
# lines: 717
# functions: 20
#-----------------------------------------------------------------------
#**********************************************************************
#======================================================================
# Argument check
# $1: check
# $2: arguments
#======================================================================
check__OPTIONS() {
! [[ "$2" ]] \
&& noarg "Cannot be empty"
while test "$2"
do
#echo "$2"
case "$2" in
all) domain__get;check_all=true;update__database ;;
-F) check_force=true ;;
*".stl") article__hash "$2" ;;
*) noarg "$2" "add (ARTICLE)" ;;
esac
shift
done
[[ "$uri_article" ]] \
&& checkers "$uri_article"
}
#======================================================================
# Get sub uri, split articl (head,post)
# Start modules to check article
# $1: article
#======================================================================
checkers() {
# Check if article DB exists, compare hashes
if__article_db "$article_db"
if ! [[ $check_force ]];then # If DB exists
if [[ $db_exists ]] && ! [[ $need_check ]];then
printf '%s\n%s\n%s\n%s\n' \
"# Article: $uri_article" \
" Database: $db_file (type stl db $this_article) for content" \
" Hash: $article_hash" \
" Status: Already checked (Use -F to force check again)"
exit
fi
fi
# Set article uri
article_name=`basename "$uri_article"`
uri_folder=${uri_article/\/$article_name/} #No ending / for checkers
article_dir_srv=${uri_folder/$domain_dir_articles}
# HTML sub uri
sub_genuri_srv="${article_dir_srv//[^\/]}"
! [[ $sub_genuri_srv ]] \
&& sub_genuri_srv="./" \
|| sub_genuri_srv=`printf '%0.s../' $(seq 1 ${#sub_genuri_srv})`
split_article "$1" # Split metas & content
[[ $stl_error ]] && return # Cancel if no separator
# Create TMP file for Db, with URI files
article_tmp_db=`mktemp`
# Checkers
check__metas "$article_tmp_head" # Needed metas
check__titles "$article_tmp_post"
check__paragraphs "$article_tmp_post"
check__quotes "$article_tmp_post"
check__lists "$article_tmp_post"
check__icodes "$article_tmp_post"
check__strongs "$article_tmp_post"
check__bolds "$article_tmp_post"
check__emphasis "$article_tmp_post"
check__cross "$article_tmp_post"
check__dels "$article_tmp_post"
! [[ $stl_error ]] \
&& log__add -i -C -A "Content Ok"
# Statistics ; manage__stats
stats__words "$article_tmp_post"
# If no error, write to db
db__print
# No more needed split files (created in split_article())
rm -f "$article_tmp_head"
rm -f "$article_tmp_post"
rm -f "$article_tmp_db"
}
#----------------------------------------------------------------------
# Modules that check article contents
#----------------------------------------------------------------------
#======================================================================
# Check NEEDED metas. Called from checkers()
# $1: $article_tmp_head
#======================================================================
check__metas() {
# Function to check NEEDED metas and their content
# Find line with marker, stdout without marker
# $1: marker
# $2: $article_tmp_head
# --------------------
check__needed_meta() {
while read -r "content"
do
awk -F"$1" '{print $2}' <<<"$content"
break # Only the first one will be registred
done < <(grep "$1" "$2")
}
article_Date=`check__needed_meta "^date: " "$1"`
article_Title=`check__needed_meta "^title: " "$1"`
article_About=`check__needed_meta "^about: " "$1"`
article_Author=`check__needed_meta "^author: " "$1"`
article_Tags=`check__needed_meta "^tags: " "$1"`
# Analyse Date Syntaxe, Month & Day
case "$article_Date" in
'') log__add -e -C -Mda "No date registred. Use: YYYY-MM-DD" ;;
[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9])
date_Y=`awk -F- '{print $1}' <<<"$article_Date"`
# Check Month
date_M=`awk -F- '{print $2}' <<<"$article_Date"`
[[ ${date_M::1} == 0 ]] \
&& date_M=${date_M:1} \
&& date_M0=true
(( $date_M > 12 || $date_M < 1 )) \
&& log__add -e -C -Mda "Mismatch Month: '$date_M'"
[[ $date_M0 ]] \
&& date_M="0$date_M" # Get Month with 0 back
# Check Day
[[ ${date_D::1} == 0 ]] \
&& date_D=${date_D:1} \
&& date_D0=true
date_D=`awk -F- '{print $3}' <<<"$article_Date"`
(( $date_D > 31 || $date_D < 1 )) \
&& log__add -e -C -Mda "Mismatch Day: '$date_D'"
[[ $date_D0 ]] \
&& date_D="0$date_D" # Get Day with 0 back
# timestamp date to db (for sidebar sort)
false_time=`date +%T`
date_epoch="$date_Y-$date_M-$date_D $false_time"
date_epoch=`date +%s -u -d "$date_epoch"`
;;
*) log__add -e -C -Mda "Mismatch date. Use: YYYY-MM-DD" ;;
esac
[[ "$article_Title" ]] || log__add -e -C -Mti "No title registred"
[[ "$article_About" ]] || log__add -e -C -Mab "No about registred"
[[ "$article_Author" ]] || log__add -e -C -Mau "No author registred"
[[ "$article_Tags" ]] || log__add -e -C -Mta "No tags registred"
[[ $stl_error ]] || log__add -i -C -M "Needed Metas Ok"
# Init stats for files
stat_images=0
stat_link_files=0
stat_codes=0
stat_bruts=0
# Analyse optional metas content. Function is in manage_articles
get__content_metas "^link: " "$1" check
get__content_metas "^file: " "$1" check
get__content_metas "^image: " "$1" check
get__content_metas "^brut: " "$1" check
get__content_metas "^code: " "$1" check
get__content_metas "^abbr: " "$1" check
! [[ $stl_error ]] \
&& log__add -i -C -M "Optional Metas Ok"
}
#======================================================================
# Check content metas LINK
# Called from get__content_metas() in manage__articles
#======================================================================
check__link() {
! [[ $meta ]] && return
# Needed meta fields
if ! [[ "$header_f1" && "$header_f2" ]];then
log__add -e -C -Mlt \
"Line $ln. Mismatch: 'NAME : URL : ALT-TEXT'"
return
fi
# Accessibility alt-text
if ! [[ "$header_f3" ]];then
log__add -w -C -Mlt \
"Line $ln. Accessibility: please, use ALT-TEXT"
fi
# Needed article content
if ! [[ `grep "_$header_f1" "$article_tmp_post"` ]];then
log__add -e -C -A \
"Line $ln. No content: '_$header_f1'"
return
fi
((stat_links++))
}
#======================================================================
# Common function to check if file exists
# for __image, __precode, __brut __link_file
# $1: generic_file (with @ in FILENAME) | article_file
# $2: Log_f3
#======================================================================
check__file_exist() {
case "$1" in
"article_file")
uri_file="$uri_folder/$header_f2"
if ! [[ -f "$uri_folder/$header_f2" ]];then
this_article="$uri_folder/$header_f2"
log__add -e -C "$2" \
"Line $ln. File not found: '$header_f2'"
fi
;;
"generic_file")
header_f2=${header_f2/@/}
case "$2" in
"-Mim") uri_file="$domain_dir_images/$header_f2" ;;
*) uri_file="$domain_dir_files/$header_f2" ;;
esac
if ! [[ -f "$uri_file" ]];then
this_article="$domain_dir_images/$header_f2"
log__add -e -C "$2" \
"Line $ln. File not found: '$header_f2'"
fi
;;
esac
}
#======================================================================
# Check content metas LINK FILE
# Called from get__content_metas() in manage__articles
#======================================================================
check__link_file() {
! [[ $meta ]] && return
# Needed meta fields
if ! [[ "$header_f1" && "$header_f2" ]];then
log__add -e -C -Mlf \
"Line $ln. Mismatch: 'NAME : FILENAME : ALT-TEXT'"
return
fi
# Accessibility alt-text
if ! [[ "$header_f3" ]];then
log__add -w -C -Mlf \
"Line $ln. Accessibility: please, use ALT-TEXT"
fi
# Needed article content
if ! [[ `grep "__$header_f1" "$article_tmp_post"` ]];then
log__add -e -C -A \
"Line $ln. No content: '__$header_f1'"
return
fi
# File exists
case "$header_f2" in
"@"*) check__file_exist "generic_file" "-Mlf" ;;
*) check__file_exist "article_file" "-Mlf" ;;
esac
[[ $stl_error ]] && return
# Count
((stat_link_files++))
# Write to tmp BD
echo "File_$stat_link_files='$uri_file'" >> "$article_tmp_db"
}
#======================================================================
# Check content metas IMAGE
# Called from get__content_metas() in manage__articles
#======================================================================
check__image() {
! [[ $meta ]] && return
# Needed meta fields
if ! [[ "$header_f1" && "$header_f2" && "$header_f3" ]];then
log__add -e -C -Mim \
"Line $ln. Mismatch: 'NBR : FILENAME : ALT-TEXT'"
return
fi
# Needed article content
image_ln_syntax=`grep -n "_image:$header_f1" "$article_tmp_post"`
image_ln=`awk -F: '{print $1}' <<<"$image_ln_syntax"`
image_syntax=${image_ln_syntax/$image_ln:}
image_ln=$(( image_ln + article_begin - 1 ))
if ! [[ "$image_syntax" ]];then
log__add -e -C -A \
"Line $ln. No content: '_image:$header_f1'"
return
else
case "$image_syntax" in
"_image:$header_f1") true ;; # No argument
"_image:$header_f1 "*) true ;; # With arguments
"_image:$header_f1"*)
log__add -e -C -A \
"Line $image_ln. Syntax error: $image_syntax"
return
;;
esac
fi
# File exists
case "$header_f2" in
"@"*) check__file_exist "generic_file" "-Mim" ;;
*) check__file_exist "article_file" "-Mim" ;;
esac
[[ $stl_error ]] && return
# Count
((stat_images++))
# Write to tmp BD
echo "Image_$stat_images='$uri_file'" >> "$article_tmp_db"
}
#======================================================================
# Check content metas CODE (with <pre>)
# Called from get__content_metas() in manage__articles
#======================================================================
check__precode() {
! [[ $meta ]] && return
# Needed meta fields
if ! [[ "$header_f1" && "$header_f2" && "$header_f3" ]];then
log__add -e -C -Mpc \
"Line $ln. Mismatch: 'NBR : FILENAME : ALT-TEXT'"
return
fi
# Needed article content
if ! [[ `grep "_code:$header_f1" "$article_tmp_post"` ]];then
log__add -e -C -A \
"Line $ln. No content: '_code:$header_f1'"
return
fi
# File exists
case "$header_f2" in
"@"*) check__file_exist "generic_file" "-Mpc" ;;
*) check__file_exist "article_file" "-Mpc" ;;
esac
[[ $stl_error ]] && return
# Count
((stat_codes++))
# Write to tmp BD
echo "Code_$stat_codes='$uri_file'" >> "$article_tmp_db"
}
#======================================================================
# Check content metas BRUT
# Called from get__content_metas() in manage__articles
#======================================================================
check__brut() {
! [[ $meta ]] && return
# Needed meta fields
if ! [[ "$header_f1" && "$header_f2" ]];then
log__add -e -C -Mbr \
"Line $ln. Mismatch: 'NBR : FILENAME'"
return
fi
# Needed article content
if ! [[ `grep "_brut:$header_f1" "$article_tmp_post"` ]];then
log__add -e -C -A \
"Line $ln. No content: '_brut:$header_f1'"
return
fi
# File exists
case "$header_f2" in
"@"*) check__file_exist "generic_file" "-Mbr" ;;
*) check__file_exist "article_file" "-Mbr";;
esac
[[ $stl_error ]] && return
# Count
((stat_bruts++))
# Write to tmp BD
echo "Brut_$stat_bruts='$uri_file'" >> "$article_tmp_db"
}
#======================================================================
# Check content metas ABBR
# Called from get__content_metas() in manage__articles
#======================================================================
check__abbr() {
[[ $meta ]] || return
# Needed meta fields
if ! [[ "$header_f1" && "$header_f2" ]];then
log__add -e -C -Mab \
"Line $ln. Mismatch: 'SHORT : Long : option'"
return
fi
# Meta field 1 must be in CAPS
if ! [[ $header_f1 == ${header_f1^^} ]];then
log__add -e -C -Mab \
"Line $ln. $header_f1 must be in CAPITAL"
return
fi
# Needed article content
if ! [[ `grep -E " $header_f1|$header_f1 " "$article_tmp_post"` ]];then
log__add -e -C -A \
"Line $ln. No content (abbr): '$header_f1'"
return
fi
}
#======================================================================
# Check content article : TITLES (#1-6)
# $1: $article_tmp_post
#======================================================================
check__titles() {
stat_titles=0
while IFS=: read -r "ln" "content"
do
article__line # count new ln
tc=${content:3} # Title content
if ! [[ "$tc" ]];then
log__add -e -C -A \
"Line $ln. Title is empty"
fi
tn=${content:1:1} # Title number
case "$tn" in
[1-6]) ((stat_titles++)) ;;
*)
log__add -e -C -A \
"Line $ln. Title mismatch '$tn' ; Use 1-6"
;;
esac
done < <(grep -n '^#' "$1" | grep -v '# ')
}
#======================================================================
# Check content article : PARAGRAPHS
# $1: $article_tmp_post
#======================================================================
check__paragraphs() {
stat_p_start=`grep -E "^\($|^\( " "$1" | wc -l`
stat_p_close=`grep -E "^\)$" "$1" | wc -l`
# Not paired
if ! [[ $stat_p_start == $stat_p_close ]];then
log__add -e -C -A \
"Paragraphs mismatch: '('=$stat_p_start ; ')'=$stat_p_close"
elif (( $stat_p_start == 0 ));then
stat_paragraphs=0
log__add -w -C -A \
"No paragraphs. To set one: '(' and ')' at begining lines"
else
stat_paragraphs=$stat_p_start
fi
}
#======================================================================
# Check content article : QUOTES
# $1: $article_tmp_post
#======================================================================
check__quotes() {
stat_q_nbr=`grep -E "^---$|^--- " "$1" | wc -l`
stat_q_paired=$(( stat_q_nbr % 2 ))
# Not paired
if ! [[ "$stat_q_paired" -eq 0 ]];then
log__add -e -C -A \
"Quotes mismatch: '---' not paired"
return
fi
# Stats
stat_quotes=$(( stat_q_nbr / 2 ))
}
#======================================================================
# Check content article : LISTS
# $1: $article_tmp_post
#======================================================================
check__lists() {
stat_l_start=`grep -E "^<<" "$1" | wc -l`
stat_l_close=`grep -E "^>>" "$1" | wc -l`
# Not paired
if ! [[ $stat_l_start == $stat_l_close ]];then
log__add -e -C -A \
"Lists mismatch: '('=$stat_p_start ; ')'=$stat_p_close"
return
fi
# Mismatch content after first marker list <<
if (( $stat_l_close >= 1 ));then
while read -r "ln" "content"
do
article__line # count new ln
if ! [[ `grep -E '^\=|^\+' <<<"$content"` ]];then
log__add -e -C -A \
"Line $ln. Lists mismatch: '$content' not '=' or '+'"
break
fi
done < <(awk '/^<</{f=1;next} /^>>/{f=0} f {print NR,$1}' "$1")
fi
[[ $stl_error ]] && return
# Stats
stat_lists=$stat_l_start
}
#======================================================================
# Check content article : ICODES
# $1: $article_tmp_post
#======================================================================
check__icodes() {
while IFS=: read -r 'ln' 'line'
do
article__line
stat_icode_start=`grep -o '_\`' <<<"$line" | wc -l`
stat_icode_close=`grep -o '\`_' <<<"$line" | wc -l`
if (( stat_icode_start != $stat_icode_close ));then
log__add -e -C -A \
"Line $ln. icodes mismatch: \
'\`_'=$stat_icode_start ; '_\`'=$stat_icode_close"
fi
done < <(grep -n '_`\|`_' "$1")
# Stats
[[ $stl_error ]] && return
article_icodes=`grep -o '\`_' "$1" | wc -l`
}
#======================================================================
# Check content article : STRONGS
# $1: $article_tmp_post
#======================================================================
check__strongs() {
while IFS=: read -r 'ln' 'line'
do
article__line
stat_strong_start=`grep -o '\*_' <<<"$line" | wc -l`
stat_strong_close=`grep -o '_\*' <<<"$line" | wc -l`
if (( stat_strong_start != $stat_strong_close ));then
log__add -e -C -A \
"Line $ln. strongs mismatch: \
'*_'=$stat_strong_start ; '_*'=$stat_strong_close"
fi
done < <(grep -n '\*_\|_\*' "$1")
# Stats
[[ $stl_error ]] && return
article_strongs=`grep -o '\*_' "$1" | wc -l`
}
#======================================================================
# Check content article : BOLDS
# $1: $article_tmp_post
#======================================================================
check__bolds() {
while IFS=: read -r 'ln' 'line'
do
article__line
stat_bold_start=`grep -o '+_' <<<"$line" | wc -l`
stat_bold_close=`grep -o '_+' <<<"$line" | wc -l`
if (( stat_bold_start != $stat_bold_close ));then
log__add -e -C -A \
"Line $ln. bolds mismatch: \
'+_'=$stat_bold_start ; '_+'=$stat_bold_close"
fi
done < <(grep -n '+_\|_+' "$1")
# Stats
[[ $stl_error ]] && return
article_bolds=`grep -o '+_' "$1" | wc -l`
}
#======================================================================
# Check content article : EMPHASIS
# $1: $article_tmp_post
#======================================================================
check__emphasis() {
while IFS=: read -r 'ln' 'line'
do
article__line
stat_em_start=`grep -o '\\\_' <<<"$line" | wc -l`
stat_em_close=`grep -o '_\\\' <<<"$line" | wc -l`
if (( stat_em_start != $stat_em_close ));then
log__add -e -C -A \
"Line $ln. emphasis mismatch: \
'\\_'=$stat_em_start ; '_\\'=$stat_em_close"
fi
done < <(grep -n '\\_\|_\\' "$1")
# Stats
[[ $stl_error ]] && return
article_emphasis=`grep -o '\\\_' "$1" | wc -l`
}
#======================================================================
# Check content article : CROSS
# $1: $article_tmp_post
#======================================================================
check__cross() {
while IFS=: read -r 'ln' 'line'
do
article__line
stat_cross_start=`grep -o '×_' <<<"$line" | wc -l`
stat_cross_close=`grep -o '_×' <<<"$line" | wc -l`
if (( stat_cross_start != $stat_cross_close ));then
log__add -e -C -A \
"Line $ln. cross mismatch: \
'×_'=$stat_cross_start ; '_×'=$stat_cross_close"
fi
done < <(grep -n '×_\|_×' "$1")
# Stats
[[ $stl_error ]] && return
article_cross=`grep -o '×_' "$1" | wc -l`
}
#======================================================================
# Check content article : DELS
# $1: $article_tmp_post
#======================================================================
check__dels() {
while IFS=: read -r 'ln' 'line'
do
article__line
stat_del_start=`grep -o '~_' <<<"$line" | wc -l`
stat_del_close=`grep -o '_~' <<<"$line" | wc -l`
if (( stat_del_start != $stat_del_close ));then
log__add -e -C -A \
"Line $ln. dels mismatch: \
'~_'=$stat_del_start ; '_~'=$stat_del_close"
fi
done < <(grep -n '~_\|_~' "$1")
# Stats
[[ $stl_error ]] && return
article_dels=`grep -o '~_' "$1" | wc -l`
}