gsl-statique-litterateur/var/lib/gsl/scripts/gsl__post_checkers

566 lines
16 KiB
Plaintext
Raw Normal View History

#!/bin/bash
# file: gsl__post_checkers
# Folder: /var/lib/gsl/scripts
# By echolib
# License: GNU AFFERO GENERAL PUBLIC LICENSE Version 3, 19 November 2007
#=======================================================================
# From: gsl__loop_posts | Check Post from modules
#=======================================================================
gsl__post_all_checkers() {
clear
unset gsl_check_done
gsl_process="Checked"
echo -ne "Searching for #1..."
gsl__post_check_h1 || return
echo -n "\r\033[2K: : Searching for NEEDED HEADERS..."
2022-02-14 02:57:58 +01:00
gsl__post_check_needed_headers
echo -n "\r\033[2K: : Searching for Post TYPE..."
gsl__post_check_type
2022-02-15 17:56:45 +01:00
echo -ne "\r\033[2K: Searching for PARAGRAPHS"
2022-02-14 02:57:58 +01:00
gsl__post_check_paragraphs
echo -ne "\r\033[2K: Searching for Content MARKERS..."
gsl__post_check_markers
2022-02-14 18:54:07 +01:00
echo -ne "\r\033[2K: Searching for BLOCKQUOTEs..."
2022-02-14 02:57:58 +01:00
gsl__post_check_blockquote
2022-02-14 18:54:07 +01:00
echo -ne "\r\033[2K: Searching for ABBRs..."
2022-02-14 02:57:58 +01:00
gsl__post_check_abbr
2022-02-15 17:56:45 +01:00
echo -ne "\r\033[2K: Searching for LINKs..."
2022-02-14 18:54:07 +01:00
gsl__post_check_links
2022-02-15 17:56:45 +01:00
echo -ne "\r\033[2K: Searching for FILES..."
gsl__post_check_files
echo -ne "\r\033[2K: Searching for IMAGES..."
gsl__post_check_images
echo -ne "\r\033[2K: Searching for BLOCK-CODES..."
gsl__post_check_bcodes
2022-02-17 17:21:35 +01:00
echo -ne "\r\033[2K: Searching for some STATS..."
gsl__post_check_stats
2022-02-15 12:42:54 +01:00
echo -ne "\r\033[2K"
2022-02-15 17:56:45 +01:00
# End of checkers : show logs for war and err
2022-02-17 11:28:04 +01:00
gsl__db_line_post
[[ "$gsl_checker_war" ]] \
2022-02-14 02:57:58 +01:00
&& gsl log -s -w
[[ "$gsl_checker_err" ]] \
2022-02-17 11:28:04 +01:00
&& gsl log -s -e
}
#=======================================================================
2022-02-14 02:57:58 +01:00
# Check Post for MISSING & VALID Content HEADERS
#=======================================================================
2022-02-14 02:57:58 +01:00
gsl__post_check_needed_headers() {
#-----------------------------------------------------------------------
# Check if Missing NEEDED HEADERS
2022-02-14 02:57:58 +01:00
#-----------------------------------------------------------------------
# Title
gsl_header_title=`gsl__get_header "$gsl_marker_title" "$gsl_post"`
gsl__check_needed_headers \
"$gsl_header_title" \
"$gsl_marker_title" \
"POST TITLE"
# Slug
gsl_header_slug=`gsl__get_header "$gsl_marker_slug" "$gsl_post"`
gsl__check_needed_headers \
"$gsl_header_slug" \
"$gsl_marker_slug" \
"POST-TITLE" \
2022-02-14 02:57:58 +01:00
&& gsl_slug_err=true
# Author
gsl_header_author=`gsl__get_header "$gsl_marker_author" "$gsl_post"`
gsl__check_needed_headers \
"$gsl_header_author" \
"$gsl_marker_author" \
"REGISTRED NAME" \
2022-02-14 02:57:58 +01:00
&& gsl_author_err=true
# Date
gsl_header_date=`gsl__get_header "$gsl_marker_date" "$gsl_post"`
gsl__check_needed_headers \
"$gsl_header_date" \
"$gsl_marker_date" \
"YYYY-MM-DD" \
2022-02-14 02:57:58 +01:00
&& gsl_date_err=true
# Description
gsl_header_info=`gsl__get_header "$gsl_marker_info" "$gsl_post"`
gsl__check_needed_headers \
"$gsl_header_info" \
"$gsl_marker_info" \
"DESCRIPTION"
# Tags
gsl_header_tags=`gsl__get_header "$gsl_marker_tags" "$gsl_post"`
gsl__check_needed_headers \
"$gsl_header_tags" \
"$gsl_marker_tags" \
"TAG1,OTHER TAG2,TAG3"
2022-02-14 02:57:58 +01:00
#-----------------------------------------------------------------------
# Check for VALID Content HEADERS
#-----------------------------------------------------------------------
# Author registred
2022-02-14 02:57:58 +01:00
if ! [[ "$gsl_author_err" ]] && \
! [[ `grep "$gsl_header_author" "$gsl_file_auth_ndd"` ]];then
gsl__logs_print \
"$gsl_log_e" \
"Post" \
"Header $gsl_marker_author" \
"$gsl_header_author not registred for domain $gsl_find_domain" \
"${PWD}/$gsl_post"
2022-02-14 02:57:58 +01:00
gsl_checker_err=true
fi
# Date Format YYYY-MM-DD
2022-02-14 02:57:58 +01:00
if ! [[ "$gsl_date_err" ]] && \
! [[ "$gsl_header_date" =~ $gsl_test_date ]];then
gsl__logs_print \
"$gsl_log_e" \
"Post" \
"Header $gsl_marker_date" \
"$gsl_header_date not YYYY-MM-DD" \
"${PWD}/$gsl_post"
2022-02-14 02:57:58 +01:00
gsl_checker_err=true
fi
# Slug format title-post
2022-02-14 02:57:58 +01:00
if ! [[ "$gsl_slug_err" ]];then
gsl_header_slug_test=${gsl_header_slug// /-}
if ! [[ "$gsl_header_slug" == "$gsl_header_slug_test" ]];then
gsl_new_header_slug="$gsl_marker_slug$gsl_header_slug_test"
sed -i \
"s|$gsl_marker_slug$gsl_header_slug|$gsl_new_header_slug|" \
2022-02-14 02:57:58 +01:00
"$gsl_post" && \
gsl__logs_print \
"$gsl_log_w" \
"Post" \
"Header $gsl_marker_slug" \
"Changed: $gsl_header_slug_test" \
"${PWD}/$gsl_post"
2022-02-14 02:57:58 +01:00
gsl_header_slug=$gsl_header_slug_test
gsl_checker_war=true
fi
fi
}
#=======================================================================
# Check Post for begining content
#=======================================================================
gsl__post_check_h1() {
gsl_post_begin=`
2022-02-14 02:57:58 +01:00
grep -n "#1" $gsl_post \
| head -1 \
| awk -F: '{print $1}'`
if ! [[ "$gsl_post_begin" ]];then
2022-02-14 02:57:58 +01:00
gsl__logs_print \
"$gsl_log_e" \
"Post" \
"Content begin" \
"Missing: #1 TITLE" \
2022-02-14 02:57:58 +01:00
"${PWD}/$gsl_post"
gsl_checker_err=true
fi
}
#=======================================================================
# Check/Set for Post ID
#=======================================================================
gsl__post_check_type() {
if [[ `awk -v m="$gsl_marker_type" -v l="$gsl_post_begin" \
'NR < l && $0 ~ m' \
"$gsl_post"` ]];then
gsl_post_type=`gsl__get_header "$gsl_marker_type" "$gsl_post"`
# No ID found
case "$gsl_post_type" in
page|post) true ;;
*)
sed -i "s|$gsl_marker_type.*|${gsl_marker_type}post|" \
"$gsl_post"
;;
esac
else
sed -i "1i${gsl_marker_type}post" \
"$gsl_post"
fi
}
2022-02-14 02:57:58 +01:00
#=======================================================================
# Check for paragraphs ( and )
#=======================================================================
gsl__post_check_paragraphs() {
gsl_post_p_open_nbr=`
awk -v line="$gsl_post_begin" \
-v op="$gsl_mo_p" \
'NR > line && $1 == op' \
"$gsl_post" \
| wc -l`
gsl_post_p_close_nbr=`
awk -v line="$gsl_post_begin" \
-v cp="$gsl_mc_p" \
'NR > line && $1 == cp' \
"$gsl_post" \
| wc -l`
if (( "$gsl_post_p_open_nbr" == 0 ));then
gsl__logs_print \
"$gsl_log_e" \
"Post" \
"Content paragraphs" \
2022-02-14 02:57:58 +01:00
"Missing content: ( and ) at begining lines" \
"${PWD}/$gsl_post"
gsl_checker_err=true
return
fi
if (( "$gsl_post_p_open_nbr" == "$gsl_post_p_close_nbr" ));then
gsl_post_stat_p="$gsl_post_p_open_nbr"
else
gsl__logs_print \
"$gsl_log_e" \
"Post" \
"Content paragraphs" \
2022-02-14 02:57:58 +01:00
"Mismatch: (=$gsl_post_p_open_nbr ; )=$gsl_post_p_close_nbr" \
"${PWD}/$gsl_post"
gsl_checker_err=true
return
fi
}
#=======================================================================
# Check and count MARKERS content (Strong, links...)
#=======================================================================
gsl__post_check_markers() {
gsl_markers_nbr=$(( ${#gsl_o_markers[@]} - 1 ))
gsl_post_stat_m=(0 0 0 0 0 0)
while read -r "gsl_post_line"
do
gsl_line_nbr=`awk -F":" '{print $1}' <<< $gsl_post_line`
for gsl_i in `seq 0 $gsl_markers_nbr`
do
if [[ "$gsl_post_line" =~ ${gsl_o_markers[gsl_i]} ]];then
# Open Marker by Type
gsl_o=`
grep -o "${gsl_o_markers[gsl_i]}" \
<<< $gsl_post_line \
| wc -l`
# Close Marker by Type
gsl_c=`
grep -o "${gsl_c_markers[gsl_i]}" \
<<< $gsl_post_line \
| wc -l`
# Strong Vs Bold | 2= **_ / 3= *_ in array
if (( $gsl_i == 2 ));then
gsl_oS=$gsl_o # Open Strong marker nbr
gsl_cS=$gsl_c # Close Strong marker nbr
elif (( $gsl_i == 3 ));then
# Count difference from Strong
if (( $gsl_oS ));then
gsl_o=$(( $gsl_o - $gsl_oS ))
unset gsl_oS
fi
if (( $gsl_cS ));then
gsl_c=$(( $gsl_c - $gsl_cS ))
unset gsl_cS
fi
fi
# Compare
if ! (( $gsl_o == $gsl_c ));then
gsl__logs_print \
"$gsl_log_e" \
"Post" \
"${gsl_i_markers[gsl_i]}" \
2022-02-17 17:21:35 +01:00
"Line:$gsl_line_nbr > Mismatch ${gsl_u_markers[gsl_i]}" \
2022-02-14 02:57:58 +01:00
"${PWD}/$gsl_post"
gsl_checker_err=true
else
gsl_post_stat_m[gsl_i]=$(( gsl_post_stat_m[gsl_i] + gsl_o ))
fi
fi
done
done < <(awk -v line="$gsl_post_begin" \
'NR >= line {print NR":",$0}' \
"$gsl_post")
2022-02-17 17:21:35 +01:00
gsl_post_stat_mdb=${gsl_post_stat_m[@]}
2022-02-14 02:57:58 +01:00
# Show Counts in logs
for gsl_i in `seq 0 $gsl_markers_nbr`
do
if (( ${gsl_post_stat_m[gsl_i]} >= 1 ));then
gsl__logs_print \
"$gsl_log_i" \
"Post" \
"${gsl_i_markers[gsl_i]}" \
"NBR=${gsl_post_stat_m[gsl_i]} > ${gsl_u_markers[gsl_i]}" \
"${PWD}/$gsl_post"
fi
done
}
#=======================================================================
# Check for blockquotes: ---
#=======================================================================
gsl__post_check_blockquote() {
gsl_post_blockquotes_nbr=`
awk -v line="$gsl_post_begin" \
-v bq="$gsl_mark_blockquote" \
'NR > line && $1 == bq' \
"$gsl_post" \
| wc -l`
gsl_post_blockquotes_nbr_r=$(( $gsl_post_blockquotes_nbr % 2 ))
if [[ "$gsl_post_blockquotes_nbr_r" -eq 0 ]];then
gsl_post_stat_bq=$(( $gsl_post_blockquotes_nbr / 2 ))
gsl__logs_print \
"$gsl_log_i" \
"Post" \
"Content citations" \
2022-02-14 02:57:58 +01:00
"NBR=$gsl_post_stat_bq > $gsl_mark_blockquote (2x)" \
"${PWD}/$gsl_post"
fi
}
#=======================================================================
# Check Post for ABBR HEADER + Content
#=======================================================================
gsl__post_check_abbr() {
2022-02-14 18:54:07 +01:00
gsl_post_stat_abbr=0
2022-02-15 12:42:54 +01:00
while read -r "gsl_header_content_line"
2022-02-14 02:57:58 +01:00
do
2022-02-15 17:56:45 +01:00
2022-02-15 12:42:54 +01:00
# Get & Check HEADER CONTENT
gsl__get_header_fields "$gsl_marker_abbr"
gsl__check_header_fields "Post" "Header $gsl_marker_abbr" "$gsl_log_act_abbr" \
2022-02-15 17:56:45 +01:00
|| continue
2022-02-15 12:42:54 +01:00
# Check POST CONTENT
[[ `gsl__get_content_line \
"$gsl_post_header_field_1" \
"$gsl_post"` ]] \
&& gsl_post_stat_abbr=$(( gsl_post_stat_abbr + 1 )) \
&& continue
# Missing CONTENT
gsl__logs_print \
"$gsl_log_e" \
"Post" \
"$gsl_log_c_abbr" \
"Missing: $gsl_post_header_field_1" \
2022-02-15 12:42:54 +01:00
"${PWD}/$gsl_post"
gsl_checker_err=true
2022-02-15 17:56:45 +01:00
2022-02-14 18:54:07 +01:00
done < <(gsl__get_header "$gsl_marker_abbr" "$gsl_post")
gsl__logs_print \
"$gsl_log_i" \
"Post" \
"Header $gsl_marker_abbr" \
2022-02-14 18:54:07 +01:00
"NBR=$gsl_post_stat_abbr" \
"${PWD}/$gsl_post"
}
#=======================================================================
# Check Post for LINKs HEADER + Content
#=======================================================================
gsl__post_check_links() {
2022-02-15 12:42:54 +01:00
gsl_post_stat_link=0
while read -r "gsl_header_content_line"
2022-02-14 18:54:07 +01:00
do
2022-02-15 12:42:54 +01:00
# Get & Check Header CONTENT
gsl__get_header_fields "$gsl_marker_link"
gsl__check_header_fields "Post" "Header $gsl_marker_link" "$gsl_log_act_link" \
2022-02-15 17:56:45 +01:00
|| continue
2022-02-14 18:54:07 +01:00
# Not ALT TEXT ?
gsl__check_header_field3 "Post" "Header $gsl_marker_link"
2022-02-14 18:54:07 +01:00
# Find if ref is in CONTENT
2022-02-15 17:56:45 +01:00
[[ `gsl__get_content_with_markers "$gsl_mo_link" "$gsl_mc_link"` ]] \
&& gsl_post_stat_link=$(( $gsl_post_stat_link + 1 )) \
&& continue
# Check URL
(( `curl -o /dev/null --silent --head --write-out \
'%{http_code}' \
"$gsl_post_header_field_2"` == 200 )) \
&& continue \
|| gsl__logs_print \
"$gsl_log_e" \
"Post" \
"Header $gsl_marker_link" \
2022-02-15 17:56:45 +01:00
"URL offline: $gsl_post_header_field_2" \
"${PWD}/$gsl_post"
2022-02-15 12:42:54 +01:00
# Missing CONTENT
gsl__logs_print \
"$gsl_log_e" \
"Post" \
"$gsl_log_c_link" \
"Missing: [_${gsl_post_header_field_1}_]" \
2022-02-15 12:42:54 +01:00
"${PWD}/$gsl_post"
gsl_checker_err=true
2022-02-15 17:56:45 +01:00
2022-02-14 18:54:07 +01:00
done < <(gsl__get_header "$gsl_marker_link" "$gsl_post")
}
2022-02-15 17:56:45 +01:00
#=======================================================================
# Check Post for FILES HEADER + Content
#=======================================================================
gsl__post_check_files() {
gsl_post_stat_file=0
while read -r "gsl_header_content_line"
do
# Get & Check Header CONTENT
gsl__get_header_fields "$gsl_marker_file"
gsl__check_header_fields "Post" "Header $gsl_marker_file" "$gsl_log_act_file" \
2022-02-15 17:56:45 +01:00
|| continue
# Not ALT TEXT ?
gsl__check_header_field3 "Post" "Header $gsl_marker_file"
2022-02-15 17:56:45 +01:00
if ! [[ -f "$gsl_dir_domain_files/$gsl_post_header_field_2" ]];then
gsl__logs_print \
"$gsl_log_e" \
"Post" \
"Header $gsl_marker_file" \
"Not found: $gsl_post_header_field_2" \
"$gsl_dir_domain_files/$gsl_post_header_field_2"
gsl_checker_err=true
fi
2022-02-15 17:56:45 +01:00
# Find if ref is in CONTENT
[[ `gsl__get_content_with_markers "$gsl_mo_file" "$gsl_mc_file"` ]] \
&& gsl_post_stat_file=$(( $gsl_post_stat_file + 1 )) \
&& continue
# Missing CONTENT
gsl__logs_print \
"$gsl_log_e" \
"Post" \
"$gsl_log_c_file" \
"Missing: <_${gsl_post_header_field_1}_>" \
2022-02-15 17:56:45 +01:00
"${PWD}/$gsl_post"
gsl_checker_err=true
done < <(gsl__get_header "$gsl_marker_file" "$gsl_post")
}
#=======================================================================
# Check Post for CODES HEADER + Content
#=======================================================================
gsl__post_check_bcodes() {
gsl_post_stat_bcode=0
while read -r "gsl_header_content_line"
do
# Get & Check Header CONTENT
gsl__get_header_fields "$gsl_marker_code"
gsl__check_header_fields "Post" "Header $gsl_marker_code" "$gsl_log_act_code" \
|| continue
if ! [[ -f "$gsl_dir_domain_files/$gsl_post_header_field_2" ]];then
gsl__logs_print \
"$gsl_log_e" \
"Post" \
"Header $gsl_marker_code" \
"Not found: $gsl_post_header_field_2" \
"$gsl_dir_domain_files/$gsl_post_header_field_2"
gsl_checker_err=true
fi
# Find if ref is in CONTENT
[[ `gsl__get_content_line \
"$gsl_mark_code : $gsl_post_header_field_1" \
"$gsl_post"` ]] \
&& gsl_post_stat_bcode=$(( $gsl_post_stat_bocde + 1 )) \
&& continue
# Missing CONTENT
gsl__logs_print \
"$gsl_log_e" \
"Post" \
"$gsl_log_c_code" \
"Missing: $gsl_mark_code : ${gsl_post_header_field_1}" \
"${PWD}/$gsl_post"
gsl_checker_err=true
done < <(gsl__get_header "$gsl_marker_code" "$gsl_post")
}
2022-02-15 17:56:45 +01:00
#=======================================================================
# Check Post for IMAGES HEADER + Content
#=======================================================================
gsl__post_check_images() {
gsl_post_stat_image=0
while read -r "gsl_header_content_line"
do
# Get & Check Header CONTENT
gsl__get_header_fields "$gsl_marker_image"
gsl__check_header_fields "Post" "Header $gsl_marker_image" "$gsl_log_act_image" \
2022-02-15 17:56:45 +01:00
|| continue
# Not ALT TEXT ?
gsl__check_header_field3 "Post" "Header $gsl_marker_image"
2022-02-15 17:56:45 +01:00
if ! [[ -f "$gsl_dir_domain_files/$gsl_post_header_field_2" ]];then
gsl__logs_print \
"$gsl_log_e" \
"Post" \
"Header $gsl_marker_image" \
"Not found: $gsl_post_header_field_2" \
"$gsl_dir_domain_files/$gsl_post_header_field_2"
gsl_checker_err=true
fi
2022-02-15 17:56:45 +01:00
# Find if ref is in CONTENT
[[ `gsl__get_content_line \
"$gsl_mark_image : $gsl_post_header_field_1" \
"$gsl_post"` ]] \
&& gsl_post_stat_image=$(( $gsl_post_stat_image + 1 )) \
&& continue
# Missing CONTENT
gsl__logs_print \
"$gsl_log_e" \
"Post" \
"$gsl_log_c_image" \
"Missing: $gsl_mark_image : ${gsl_post_header_field_1}" \
2022-02-15 17:56:45 +01:00
"${PWD}/$gsl_post"
gsl_checker_err=true
done < <(gsl__get_header "$gsl_marker_image" "$gsl_post")
}
2022-02-17 17:21:35 +01:00
#=======================================================================
# Check for more STATS
#=======================================================================
gsl__post_check_stats() {
gsl_post_stat_w=`
awk -v l="$gsl_post_begin" \
'NR >= l' \
"$gsl_post" \
| wc -w`
gsl_post_stat_w=$(( $gsl_post_stat_w - 2*$gsl_post_stat_p ))
}