stl-statilque-litterateur/var/lib/stl/scripts/check__article

718 lines
19 KiB
Text
Raw Normal View History

2022-07-20 19:37:21 +02:00
#!/bin/bash
# Name: Statique Littérateur
# Type: Article Checkers
# file: check__article
# Folder: /var/lib/stl/scripts/
# By echolib (XMPP: im@echolib.re)
# License: GNU AFFERO GENERAL PUBLIC LICENSE Version 3, 19 November 2007
2022-08-23 16:48:33 +02:00
#------------
# funny stats
#------------
# lines: 717
# functions: 20
#-----------------------------------------------------------------------
#**********************************************************************
2022-07-20 19:37:21 +02:00
#======================================================================
# Argument check
# $1: check
# $2: arguments
#======================================================================
check__OPTIONS() {
! [[ "$2" ]] \
&& noarg "Cannot be empty"
while test "$2"
do
2022-08-23 16:48:33 +02:00
#echo "$2"
2022-07-20 19:37:21 +02:00
case "$2" in
2022-08-23 16:48:33 +02:00
all) domain__get;check_all=true;update__database ;;
-F) check_force=true ;;
*".stl") article__hash "$2" ;;
*) noarg "$2" "add (ARTICLE)" ;;
2022-07-20 19:37:21 +02:00
esac
shift
done
[[ "$uri_article" ]] \
&& checkers "$uri_article"
}
#======================================================================
# Get sub uri, split articl (head,post)
# Start modules to check article
# $1: article
#======================================================================
checkers() {
2022-08-23 16:48:33 +02:00
# Check if article DB exists, compare hashes
if__article_db "$article_db"
if ! [[ $check_force ]];then # If DB exists
if [[ $db_exists ]] && ! [[ $need_check ]];then
printf '%s\n%s\n%s\n%s\n' \
"# Article: $uri_article" \
" Database: $db_file (type stl db $this_article) for content" \
" Hash: $article_hash" \
" Status: Already checked (Use -F to force check again)"
exit
fi
2022-07-20 19:37:21 +02:00
fi
# Set article uri
2022-07-20 19:37:21 +02:00
article_name=`basename "$uri_article"`
uri_folder=${uri_article/\/$article_name/} #No ending / for checkers
article_dir_srv=${uri_folder/$domain_dir_articles}
# HTML sub uri
2022-07-20 19:37:21 +02:00
sub_genuri_srv="${article_dir_srv//[^\/]}"
2022-08-13 23:52:30 +02:00
! [[ $sub_genuri_srv ]] \
2022-08-14 02:35:27 +02:00
&& sub_genuri_srv="./" \
2022-08-14 02:41:12 +02:00
|| sub_genuri_srv=`printf '%0.s../' $(seq 1 ${#sub_genuri_srv})`
2022-07-20 19:37:21 +02:00
split_article "$1" # Split metas & content
[[ $stl_error ]] && return # Cancel if no separator
# Create TMP file for Db, with URI files
article_tmp_db=`mktemp`
# Checkers
check__metas "$article_tmp_head" # Needed metas
check__titles "$article_tmp_post"
check__paragraphs "$article_tmp_post"
check__quotes "$article_tmp_post"
check__lists "$article_tmp_post"
check__icodes "$article_tmp_post"
check__strongs "$article_tmp_post"
check__bolds "$article_tmp_post"
check__emphasis "$article_tmp_post"
check__cross "$article_tmp_post"
check__dels "$article_tmp_post"
! [[ $stl_error ]] \
&& log__add -i -C -A "Content Ok"
# Statistics ; manage__stats
stats__words "$article_tmp_post"
2022-07-20 19:37:21 +02:00
# If no error, write to db
db__print
# No more needed split files (created in split_article())
rm -f "$article_tmp_head"
rm -f "$article_tmp_post"
rm -f "$article_tmp_db"
}
#----------------------------------------------------------------------
# Modules that check article contents
#----------------------------------------------------------------------
#======================================================================
# Check NEEDED metas. Called from checkers()
# $1: $article_tmp_head
#======================================================================
check__metas() {
# Function to check NEEDED metas and their content
# Find line with marker, stdout without marker
# $1: marker
# $2: $article_tmp_head
# --------------------
check__needed_meta() {
while read -r "content"
do
awk -F"$1" '{print $2}' <<<"$content"
break # Only the first one will be registred
done < <(grep "$1" "$2")
}
article_Date=`check__needed_meta "^date: " "$1"`
article_Title=`check__needed_meta "^title: " "$1"`
article_About=`check__needed_meta "^about: " "$1"`
article_Author=`check__needed_meta "^author: " "$1"`
article_Tags=`check__needed_meta "^tags: " "$1"`
# Analyse Date Syntaxe, Month & Day
2022-07-20 19:37:21 +02:00
case "$article_Date" in
'') log__add -e -C -Mda "No date registred. Use: YYYY-MM-DD" ;;
[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9])
date_Y=`awk -F- '{print $1}' <<<"$article_Date"`
# Check Month
date_M=`awk -F- '{print $2}' <<<"$article_Date"`
[[ ${date_M::1} == 0 ]] \
&& date_M=${date_M:1} \
&& date_M0=true
2022-07-20 19:37:21 +02:00
(( $date_M > 12 || $date_M < 1 )) \
&& log__add -e -C -Mda "Mismatch Month: '$date_M'"
[[ $date_M0 ]] \
&& date_M="0$date_M" # Get Month with 0 back
# Check Day
[[ ${date_D::1} == 0 ]] \
&& date_D=${date_D:1} \
&& date_D0=true
2022-07-20 19:37:21 +02:00
date_D=`awk -F- '{print $3}' <<<"$article_Date"`
(( $date_D > 31 || $date_D < 1 )) \
&& log__add -e -C -Mda "Mismatch Day: '$date_D'"
[[ $date_D0 ]] \
&& date_D="0$date_D" # Get Day with 0 back
2022-07-20 19:37:21 +02:00
# timestamp date to db (for sidebar sort)
false_time=`date +%T`
date_epoch="$date_Y-$date_M-$date_D $false_time"
date_epoch=`date +%s -u -d "$date_epoch"`
;;
*) log__add -e -C -Mda "Mismatch date. Use: YYYY-MM-DD" ;;
esac
[[ "$article_Title" ]] || log__add -e -C -Mti "No title registred"
[[ "$article_About" ]] || log__add -e -C -Mab "No about registred"
[[ "$article_Author" ]] || log__add -e -C -Mau "No author registred"
[[ "$article_Tags" ]] || log__add -e -C -Mta "No tags registred"
[[ $stl_error ]] || log__add -i -C -M "Needed Metas Ok"
# Init stats for files
stat_images=0
stat_link_files=0
stat_codes=0
stat_bruts=0
# Analyse optional metas content. Function is in manage_articles
get__content_metas "^link: " "$1" check
get__content_metas "^file: " "$1" check
get__content_metas "^image: " "$1" check
get__content_metas "^brut: " "$1" check
get__content_metas "^code: " "$1" check
get__content_metas "^abbr: " "$1" check
! [[ $stl_error ]] \
&& log__add -i -C -M "Optional Metas Ok"
2022-07-20 19:37:21 +02:00
}
#======================================================================
# Check content metas LINK
# Called from get__content_metas() in manage__articles
#======================================================================
check__link() {
! [[ $meta ]] && return
# Needed meta fields
if ! [[ "$header_f1" && "$header_f2" ]];then
log__add -e -C -Mlt \
"Line $ln. Mismatch: 'NAME : URL : ALT-TEXT'"
return
fi
# Accessibility alt-text
if ! [[ "$header_f3" ]];then
log__add -w -C -Mlt \
"Line $ln. Accessibility: please, use ALT-TEXT"
fi
# Needed article content
if ! [[ `grep "_$header_f1" "$article_tmp_post"` ]];then
log__add -e -C -A \
"Line $ln. No content: '_$header_f1'"
return
fi
((stat_links++))
2022-07-20 19:37:21 +02:00
}
#======================================================================
# Common function to check if file exists
# for __image, __precode, __brut __link_file
# $1: generic_file (with @ in FILENAME) | article_file
# $2: Log_f3
#======================================================================
check__file_exist() {
case "$1" in
"article_file")
uri_file="$uri_folder/$header_f2"
if ! [[ -f "$uri_folder/$header_f2" ]];then
this_article="$uri_folder/$header_f2"
log__add -e -C "$2" \
"Line $ln. File not found: '$header_f2'"
fi
;;
"generic_file")
header_f2=${header_f2/@/}
case "$2" in
"-Mim") uri_file="$domain_dir_images/$header_f2" ;;
*) uri_file="$domain_dir_files/$header_f2" ;;
esac
if ! [[ -f "$uri_file" ]];then
this_article="$domain_dir_images/$header_f2"
log__add -e -C "$2" \
"Line $ln. File not found: '$header_f2'"
fi
;;
esac
}
#======================================================================
# Check content metas LINK FILE
# Called from get__content_metas() in manage__articles
#======================================================================
check__link_file() {
! [[ $meta ]] && return
# Needed meta fields
if ! [[ "$header_f1" && "$header_f2" ]];then
log__add -e -C -Mlf \
"Line $ln. Mismatch: 'NAME : FILENAME : ALT-TEXT'"
return
fi
# Accessibility alt-text
if ! [[ "$header_f3" ]];then
log__add -w -C -Mlf \
"Line $ln. Accessibility: please, use ALT-TEXT"
fi
# Needed article content
if ! [[ `grep "__$header_f1" "$article_tmp_post"` ]];then
log__add -e -C -A \
"Line $ln. No content: '__$header_f1'"
return
fi
# File exists
case "$header_f2" in
"@"*) check__file_exist "generic_file" "-Mlf" ;;
*) check__file_exist "article_file" "-Mlf" ;;
esac
[[ $stl_error ]] && return
# Count
((stat_link_files++))
# Write to tmp BD
echo "File_$stat_link_files='$uri_file'" >> "$article_tmp_db"
}
#======================================================================
# Check content metas IMAGE
# Called from get__content_metas() in manage__articles
#======================================================================
check__image() {
! [[ $meta ]] && return
# Needed meta fields
if ! [[ "$header_f1" && "$header_f2" && "$header_f3" ]];then
log__add -e -C -Mim \
"Line $ln. Mismatch: 'NBR : FILENAME : ALT-TEXT'"
return
fi
# Needed article content
image_ln_syntax=`grep -n "_image:$header_f1" "$article_tmp_post"`
image_ln=`awk -F: '{print $1}' <<<"$image_ln_syntax"`
image_syntax=${image_ln_syntax/$image_ln:}
image_ln=$(( image_ln + article_begin - 1 ))
if ! [[ "$image_syntax" ]];then
2022-07-20 19:37:21 +02:00
log__add -e -C -A \
"Line $ln. No content: '_image:$header_f1'"
2022-07-20 19:37:21 +02:00
return
else
case "$image_syntax" in
"_image:$header_f1") true ;; # No argument
"_image:$header_f1 "*) true ;; # With arguments
"_image:$header_f1"*)
log__add -e -C -A \
"Line $image_ln. Syntax error: $image_syntax"
return
;;
esac
2022-07-20 19:37:21 +02:00
fi
# File exists
case "$header_f2" in
"@"*) check__file_exist "generic_file" "-Mim" ;;
*) check__file_exist "article_file" "-Mim" ;;
esac
[[ $stl_error ]] && return
# Count
((stat_images++))
# Write to tmp BD
echo "Image_$stat_images='$uri_file'" >> "$article_tmp_db"
}
#======================================================================
# Check content metas CODE (with <pre>)
# Called from get__content_metas() in manage__articles
#======================================================================
check__precode() {
! [[ $meta ]] && return
# Needed meta fields
if ! [[ "$header_f1" && "$header_f2" && "$header_f3" ]];then
log__add -e -C -Mpc \
"Line $ln. Mismatch: 'NBR : FILENAME : ALT-TEXT'"
return
fi
# Needed article content
if ! [[ `grep "_code:$header_f1" "$article_tmp_post"` ]];then
log__add -e -C -A \
"Line $ln. No content: '_code:$header_f1'"
return
fi
# File exists
case "$header_f2" in
"@"*) check__file_exist "generic_file" "-Mpc" ;;
*) check__file_exist "article_file" "-Mpc" ;;
esac
[[ $stl_error ]] && return
# Count
((stat_codes++))
# Write to tmp BD
echo "Code_$stat_codes='$uri_file'" >> "$article_tmp_db"
}
#======================================================================
# Check content metas BRUT
# Called from get__content_metas() in manage__articles
#======================================================================
check__brut() {
! [[ $meta ]] && return
# Needed meta fields
if ! [[ "$header_f1" && "$header_f2" ]];then
log__add -e -C -Mbr \
"Line $ln. Mismatch: 'NBR : FILENAME'"
return
fi
# Needed article content
if ! [[ `grep "_brut:$header_f1" "$article_tmp_post"` ]];then
log__add -e -C -A \
"Line $ln. No content: '_brut:$header_f1'"
return
fi
# File exists
case "$header_f2" in
"@"*) check__file_exist "generic_file" "-Mbr" ;;
*) check__file_exist "article_file" "-Mbr";;
esac
[[ $stl_error ]] && return
# Count
((stat_bruts++))
# Write to tmp BD
echo "Brut_$stat_bruts='$uri_file'" >> "$article_tmp_db"
}
#======================================================================
# Check content metas ABBR
# Called from get__content_metas() in manage__articles
#======================================================================
check__abbr() {
[[ $meta ]] || return
# Needed meta fields
if ! [[ "$header_f1" && "$header_f2" ]];then
log__add -e -C -Mab \
"Line $ln. Mismatch: 'SHORT : Long : option'"
return
fi
# Meta field 1 must be in CAPS
if ! [[ $header_f1 == ${header_f1^^} ]];then
log__add -e -C -Mab \
"Line $ln. $header_f1 must be in CAPITAL"
return
fi
# Needed article content
if ! [[ `grep -E " $header_f1|$header_f1 " "$article_tmp_post"` ]];then
log__add -e -C -A \
"Line $ln. No content (abbr): '$header_f1'"
return
fi
}
#======================================================================
# Check content article : TITLES (#1-6)
# $1: $article_tmp_post
#======================================================================
check__titles() {
stat_titles=0
while IFS=: read -r "ln" "content"
do
article__line # count new ln
tc=${content:3} # Title content
if ! [[ "$tc" ]];then
log__add -e -C -A \
"Line $ln. Title is empty"
fi
tn=${content:1:1} # Title number
case "$tn" in
[1-6]) ((stat_titles++)) ;;
*)
log__add -e -C -A \
"Line $ln. Title mismatch '$tn' ; Use 1-6"
;;
esac
done < <(grep -n '^#' "$1" | grep -v '# ')
}
#======================================================================
# Check content article : PARAGRAPHS
# $1: $article_tmp_post
#======================================================================
check__paragraphs() {
stat_p_start=`grep -E "^\($|^\( " "$1" | wc -l`
stat_p_close=`grep -E "^\)$" "$1" | wc -l`
# Not paired
if ! [[ $stat_p_start == $stat_p_close ]];then
log__add -e -C -A \
"Paragraphs mismatch: '('=$stat_p_start ; ')'=$stat_p_close"
elif (( $stat_p_start == 0 ));then
stat_paragraphs=0
log__add -w -C -A \
"No paragraphs. To set one: '(' and ')' at begining lines"
else
stat_paragraphs=$stat_p_start
fi
}
#======================================================================
# Check content article : QUOTES
# $1: $article_tmp_post
#======================================================================
check__quotes() {
stat_q_nbr=`grep -E "^---$|^--- " "$1" | wc -l`
stat_q_paired=$(( stat_q_nbr % 2 ))
# Not paired
if ! [[ "$stat_q_paired" -eq 0 ]];then
log__add -e -C -A \
"Quotes mismatch: '---' not paired"
return
fi
# Stats
stat_quotes=$(( stat_q_nbr / 2 ))
}
#======================================================================
# Check content article : LISTS
# $1: $article_tmp_post
#======================================================================
check__lists() {
stat_l_start=`grep -E "^<<" "$1" | wc -l`
stat_l_close=`grep -E "^>>" "$1" | wc -l`
# Not paired
if ! [[ $stat_l_start == $stat_l_close ]];then
log__add -e -C -A \
"Lists mismatch: '('=$stat_p_start ; ')'=$stat_p_close"
return
fi
# Mismatch content after first marker list <<
if (( $stat_l_close >= 1 ));then
while read -r "ln" "content"
do
article__line # count new ln
if ! [[ `grep -E '^\=|^\+' <<<"$content"` ]];then
log__add -e -C -A \
"Line $ln. Lists mismatch: '$content' not '=' or '+'"
break
fi
done < <(awk '/^<</{f=1;next} /^>>/{f=0} f {print NR,$1}' "$1")
fi
[[ $stl_error ]] && return
# Stats
stat_lists=$stat_l_start
}
#======================================================================
# Check content article : ICODES
# $1: $article_tmp_post
#======================================================================
check__icodes() {
while IFS=: read -r 'ln' 'line'
do
article__line
stat_icode_start=`grep -o '_\`' <<<"$line" | wc -l`
stat_icode_close=`grep -o '\`_' <<<"$line" | wc -l`
if (( stat_icode_start != $stat_icode_close ));then
log__add -e -C -A \
"Line $ln. icodes mismatch: \
'\`_'=$stat_icode_start ; '_\`'=$stat_icode_close"
fi
done < <(grep -n '_`\|`_' "$1")
# Stats
[[ $stl_error ]] && return
article_icodes=`grep -o '\`_' "$1" | wc -l`
}
#======================================================================
# Check content article : STRONGS
# $1: $article_tmp_post
#======================================================================
check__strongs() {
while IFS=: read -r 'ln' 'line'
do
article__line
stat_strong_start=`grep -o '\*_' <<<"$line" | wc -l`
stat_strong_close=`grep -o '_\*' <<<"$line" | wc -l`
if (( stat_strong_start != $stat_strong_close ));then
log__add -e -C -A \
"Line $ln. strongs mismatch: \
'*_'=$stat_strong_start ; '_*'=$stat_strong_close"
fi
done < <(grep -n '\*_\|_\*' "$1")
# Stats
[[ $stl_error ]] && return
article_strongs=`grep -o '\*_' "$1" | wc -l`
}
#======================================================================
# Check content article : BOLDS
# $1: $article_tmp_post
#======================================================================
check__bolds() {
while IFS=: read -r 'ln' 'line'
do
article__line
stat_bold_start=`grep -o '+_' <<<"$line" | wc -l`
stat_bold_close=`grep -o '_+' <<<"$line" | wc -l`
if (( stat_bold_start != $stat_bold_close ));then
log__add -e -C -A \
"Line $ln. bolds mismatch: \
'+_'=$stat_bold_start ; '_+'=$stat_bold_close"
fi
done < <(grep -n '+_\|_+' "$1")
# Stats
[[ $stl_error ]] && return
article_bolds=`grep -o '+_' "$1" | wc -l`
}
#======================================================================
# Check content article : EMPHASIS
# $1: $article_tmp_post
#======================================================================
check__emphasis() {
while IFS=: read -r 'ln' 'line'
do
article__line
stat_em_start=`grep -o '\\\_' <<<"$line" | wc -l`
stat_em_close=`grep -o '_\\\' <<<"$line" | wc -l`
if (( stat_em_start != $stat_em_close ));then
log__add -e -C -A \
"Line $ln. emphasis mismatch: \
'\\_'=$stat_em_start ; '_\\'=$stat_em_close"
fi
done < <(grep -n '\\_\|_\\' "$1")
# Stats
[[ $stl_error ]] && return
article_emphasis=`grep -o '\\\_' "$1" | wc -l`
}
#======================================================================
# Check content article : CROSS
# $1: $article_tmp_post
#======================================================================
check__cross() {
while IFS=: read -r 'ln' 'line'
do
article__line
stat_cross_start=`grep -o '×_' <<<"$line" | wc -l`
stat_cross_close=`grep -o '_×' <<<"$line" | wc -l`
if (( stat_cross_start != $stat_cross_close ));then
log__add -e -C -A \
"Line $ln. cross mismatch: \
'×_'=$stat_cross_start ; '_×'=$stat_cross_close"
fi
done < <(grep -n '×_\|_×' "$1")
# Stats
[[ $stl_error ]] && return
article_cross=`grep -o '×_' "$1" | wc -l`
}
#======================================================================
# Check content article : DELS
# $1: $article_tmp_post
#======================================================================
check__dels() {
while IFS=: read -r 'ln' 'line'
do
article__line
stat_del_start=`grep -o '~_' <<<"$line" | wc -l`
stat_del_close=`grep -o '_~' <<<"$line" | wc -l`
if (( stat_del_start != $stat_del_close ));then
log__add -e -C -A \
"Line $ln. dels mismatch: \
'~_'=$stat_del_start ; '_~'=$stat_del_close"
fi
done < <(grep -n '~_\|_~' "$1")
# Stats
[[ $stl_error ]] && return
article_dels=`grep -o '~_' "$1" | wc -l`
}