#!/bin/bash # file: gsl__post_checkers # Folder: /var/lib/gsl/scripts # By echolib # License: GNU AFFERO GENERAL PUBLIC LICENSE Version 3, 19 November 2007 #----------------------------------------------------------------------- # -------------------------------------------------- # From: gsl__loop_posts | MAIN checkers from modules # -------------------------------------------------- #----------------------------------------------------------------------- gsl__all_checkers() { clear unset gsl_check_done echo -ne ": Searching for #1..." gsl__post_check_h1 || return echo -ne "\r\033[2K: Searching for NEEDED HEADERS..." gsl__post_check_needed_headers echo -ne "\r\033[2K: Searching for Post TYPE..." gsl__post_check_type echo -ne "\r\033[2K: Searching for PARAGRAPHS" gsl__post_check_paragraphs echo -ne "\r\033[2K: Searching for Content MARKERS..." gsl__post_check_markers echo -ne "\r\033[2K: Searching for BLOCKQUOTEs..." gsl__post_check_blockquote echo -ne "\r\033[2K: Searching for ABBRs..." gsl__post_check_abbr echo -ne "\r\033[2K: Searching for LINKs..." gsl__post_check_links echo -ne "\r\033[2K: Searching for FILES..." gsl__post_check_files echo -ne "\r\033[2K: Searching for IMAGES..." gsl__post_check_images echo -ne "\r\033[2K: Searching for BLOCK-CODES..." gsl__post_check_bcodes echo -ne "\r\033[2K: Searching for some STATS..." gsl__post_check_stats echo -ne "\r\033[2K" # End of checkers : show logs for war and err gsl__db_line_post } #----------------------------------------------------------------------- # ------------------------------------------------- # Modules checkers ; Get Datas and check for errors # ------------------------------------------------- #----------------------------------------------------------------------- #======================================================================= # Check Post for MISSING & VALID Content HEADERS #======================================================================= gsl__post_check_needed_headers() { #----------------------------------------------------------------------- # Check if Missing NEEDED HEADERS #----------------------------------------------------------------------- # Title gsl_header_title=`gsl__get_header "$gsl_marker_title" "$gsl_post"` gsl__check_needed_headers \ "$gsl_header_title" \ "$gsl_marker_title" \ "POST TITLE" # Slug gsl_header_slug=`gsl__get_header "$gsl_marker_slug" "$gsl_post"` gsl__check_needed_headers \ "$gsl_header_slug" \ "$gsl_marker_slug" \ "POST-TITLE" \ && gsl_slug_err=true # Author gsl_header_author=`gsl__get_header "$gsl_marker_author" "$gsl_post"` gsl__check_needed_headers \ "$gsl_header_author" \ "$gsl_marker_author" \ "REGISTRED NAME" \ && gsl_author_err=true # Date gsl_header_date=`gsl__get_header "$gsl_marker_date" "$gsl_post"` gsl__check_needed_headers \ "$gsl_header_date" \ "$gsl_marker_date" \ "YYYY-MM-DD" \ && gsl_date_err=true # Description gsl_header_info=`gsl__get_header "$gsl_marker_info" "$gsl_post"` gsl__check_needed_headers \ "$gsl_header_info" \ "$gsl_marker_info" \ "DESCRIPTION" # Tags gsl_header_tags=`gsl__get_header "$gsl_marker_tags" "$gsl_post"` gsl__check_needed_headers \ "$gsl_header_tags" \ "$gsl_marker_tags" \ "TAG1,OTHER TAG2,TAG3" #----------------------------------------------------------------------- # Check for VALID Content HEADERS #----------------------------------------------------------------------- # Author registred if ! [[ "$gsl_author_err" ]] && \ ! [[ `grep "$gsl_header_author" "$gsl_file_auth_ndd"` ]];then gsl__logs_print \ "$gsl_log_e" \ "Post" \ "Header $gsl_marker_author" \ "$gsl_header_author not registred for domain $gsl_find_domain" \ "${PWD}/$gsl_post" gsl_checker_err=true fi # Date Format YYYY-MM-DD if ! [[ "$gsl_date_err" ]] && \ ! [[ "$gsl_header_date" =~ $gsl_test_date ]];then gsl__logs_print \ "$gsl_log_e" \ "Post" \ "Header $gsl_marker_date" \ "$gsl_header_date not YYYY-MM-DD" \ "${PWD}/$gsl_post" gsl_checker_err=true fi # Slug format title-post if ! [[ "$gsl_slug_err" ]];then gsl_header_slug_test=${gsl_header_slug// /-} if ! [[ "$gsl_header_slug" == "$gsl_header_slug_test" ]];then gsl_new_header_slug="$gsl_marker_slug$gsl_header_slug_test" sed -i \ "s|$gsl_marker_slug$gsl_header_slug|$gsl_new_header_slug|" \ "$gsl_post" && \ gsl__logs_print \ "$gsl_log_w" \ "Post" \ "Header $gsl_marker_slug" \ "Changed: $gsl_header_slug_test" \ "${PWD}/$gsl_post" gsl_header_slug=$gsl_header_slug_test gsl_checker_war=true fi fi } #======================================================================= # Check Post for begining content #======================================================================= gsl__post_check_h1() { gsl_post_begin=` grep -n "#1" $gsl_post \ | head -1 \ | awk -F: '{print $1}'` if ! [[ "$gsl_post_begin" ]];then gsl__logs_print \ "$gsl_log_e" \ "Post" \ "Content begin" \ "Missing: #1 TITLE" \ "${PWD}/$gsl_post" gsl_checker_err=true fi } #======================================================================= # Check/Set for Post ID #======================================================================= gsl__post_check_type() { if [[ `awk -v m="$gsl_marker_type" -v l="$gsl_post_begin" \ 'NR < l && $0 ~ m' \ "$gsl_post"` ]];then gsl_post_type=`gsl__get_header "$gsl_marker_type" "$gsl_post"` # No ID found case "$gsl_post_type" in page|post) true ;; *) sed -i "s|$gsl_marker_type.*|${gsl_marker_type}post|" \ "$gsl_post" ;; esac else sed -i "1i${gsl_marker_type}post" \ "$gsl_post" fi } #======================================================================= # Check for paragraphs ( and ) #======================================================================= gsl__post_check_paragraphs() { gsl_post_p_open_nbr=` awk -v line="$gsl_post_begin" \ -v op="$gsl_mo_p" \ 'NR > line && $1 == op' \ "$gsl_post" \ | wc -l` gsl_post_p_close_nbr=` awk -v line="$gsl_post_begin" \ -v cp="$gsl_mc_p" \ 'NR > line && $1 == cp' \ "$gsl_post" \ | wc -l` if (( "$gsl_post_p_open_nbr" == 0 ));then gsl__logs_print \ "$gsl_log_e" \ "Post" \ "Content paragraphs" \ "Missing content: ( and ) at begining lines" \ "${PWD}/$gsl_post" gsl_checker_err=true return fi if (( "$gsl_post_p_open_nbr" == "$gsl_post_p_close_nbr" ));then gsl_post_stat_p="$gsl_post_p_open_nbr" else gsl__logs_print \ "$gsl_log_e" \ "Post" \ "Content paragraphs" \ "Mismatch: (=$gsl_post_p_open_nbr ; )=$gsl_post_p_close_nbr" \ "${PWD}/$gsl_post" gsl_checker_err=true return fi } #======================================================================= # Check and count MARKERS content (Strong, links...) #======================================================================= gsl__post_check_markers() { gsl_markers_nbr=$(( ${#gsl_o_markers[@]} - 1 )) gsl_post_stat_m=(0 0 0 0 0 0) while read -r "gsl_post_line" do gsl_line_nbr=`awk -F":" '{print $1}' <<< $gsl_post_line` for gsl_i in `seq 0 $gsl_markers_nbr` do if [[ "$gsl_post_line" =~ ${gsl_o_markers[gsl_i]} ]];then # Open Marker by Type gsl_o=` grep -o "${gsl_o_markers[gsl_i]}" \ <<< $gsl_post_line \ | wc -l` # Close Marker by Type gsl_c=` grep -o "${gsl_c_markers[gsl_i]}" \ <<< $gsl_post_line \ | wc -l` # Strong Vs Bold | 2= **_ / 3= *_ in array if (( $gsl_i == 2 ));then gsl_oS=$gsl_o # Open Strong marker nbr gsl_cS=$gsl_c # Close Strong marker nbr elif (( $gsl_i == 3 ));then # Count difference from Strong if (( $gsl_oS ));then gsl_o=$(( $gsl_o - $gsl_oS )) unset gsl_oS fi if (( $gsl_cS ));then gsl_c=$(( $gsl_c - $gsl_cS )) unset gsl_cS fi fi # Compare if ! (( $gsl_o == $gsl_c ));then gsl__logs_print \ "$gsl_log_e" \ "Post" \ "${gsl_i_markers[gsl_i]}" \ "Line:$gsl_line_nbr > Mismatch ${gsl_u_markers[gsl_i]}" \ "${PWD}/$gsl_post" gsl_checker_err=true else gsl_post_stat_m[gsl_i]=$(( gsl_post_stat_m[gsl_i] + gsl_o )) fi fi done done < <(awk -v line="$gsl_post_begin" \ 'NR >= line {print NR":",$0}' \ "$gsl_post") gsl_post_stat_mdb=${gsl_post_stat_m[@]} # Show Counts in logs for gsl_i in `seq 0 $gsl_markers_nbr` do if (( ${gsl_post_stat_m[gsl_i]} >= 1 ));then gsl__logs_print \ "$gsl_log_i" \ "Post" \ "${gsl_i_markers[gsl_i]}" \ "NBR=${gsl_post_stat_m[gsl_i]} > ${gsl_u_markers[gsl_i]}" \ "${PWD}/$gsl_post" fi done } #======================================================================= # Check for blockquotes: --- #======================================================================= gsl__post_check_blockquote() { gsl_post_blockquotes_nbr=` awk -v line="$gsl_post_begin" \ -v bq="$gsl_mark_blockquote" \ 'NR > line && $1 == bq' \ "$gsl_post" \ | wc -l` gsl_post_blockquotes_nbr_r=$(( $gsl_post_blockquotes_nbr % 2 )) if [[ "$gsl_post_blockquotes_nbr_r" -eq 0 ]];then gsl_post_stat_bq=$(( $gsl_post_blockquotes_nbr / 2 )) gsl__logs_print \ "$gsl_log_i" \ "Post" \ "Content citations" \ "NBR=$gsl_post_stat_bq > $gsl_mark_blockquote (2x)" \ "${PWD}/$gsl_post" fi } #======================================================================= # Check Post for ABBR HEADER + Content #======================================================================= gsl__post_check_abbr() { gsl_post_stat_abbr=0 while read -r "gsl_header_content_line" do # Get & Check HEADER CONTENT gsl__get_header_fields "$gsl_marker_abbr" gsl__check_header_fields "Post" "Header $gsl_marker_abbr" "$gsl_log_act_abbr" \ || continue # Check POST CONTENT [[ `gsl__get_content_line \ "$gsl_post_header_field_1" \ "$gsl_post"` ]] \ && gsl_post_stat_abbr=$(( gsl_post_stat_abbr + 1 )) \ && continue # Missing CONTENT gsl__logs_print \ "$gsl_log_e" \ "Post" \ "$gsl_log_c_abbr" \ "Missing: $gsl_post_header_field_1" \ "${PWD}/$gsl_post" gsl_checker_err=true done < <(gsl__get_header "$gsl_marker_abbr" "$gsl_post") gsl__logs_print \ "$gsl_log_i" \ "Post" \ "Header $gsl_marker_abbr" \ "NBR=$gsl_post_stat_abbr" \ "${PWD}/$gsl_post" } #======================================================================= # Check Post for LINKs HEADER + Content #======================================================================= gsl__post_check_links() { gsl_post_stat_link=0 while read -r "gsl_header_content_line" do # Get & Check Header CONTENT gsl__get_header_fields "$gsl_marker_link" gsl__check_header_fields "Post" "Header $gsl_marker_link" "$gsl_log_act_link" \ || continue # Not ALT TEXT ? gsl__check_header_field3 "Post" "Header $gsl_marker_link" # Find if ref is in CONTENT [[ `gsl__get_content_with_markers "$gsl_mo_link" "$gsl_mc_link"` ]] \ && gsl_post_stat_link=$(( $gsl_post_stat_link + 1 )) \ && continue # Check URL (( `curl -o /dev/null --silent --head --write-out \ '%{http_code}' \ "$gsl_post_header_field_2"` == 200 )) \ && continue \ || gsl__logs_print \ "$gsl_log_e" \ "Post" \ "Header $gsl_marker_link" \ "URL offline: $gsl_post_header_field_2" \ "${PWD}/$gsl_post" # Missing CONTENT gsl__logs_print \ "$gsl_log_e" \ "Post" \ "$gsl_log_c_link" \ "Missing: [_${gsl_post_header_field_1}_]" \ "${PWD}/$gsl_post" gsl_checker_err=true done < <(gsl__get_header "$gsl_marker_link" "$gsl_post") } #======================================================================= # Check Post for FILES HEADER + Content #======================================================================= gsl__post_check_files() { gsl_post_stat_file=0 while read -r "gsl_header_content_line" do # Get & Check Header CONTENT gsl__get_header_fields "$gsl_marker_file" gsl__check_header_fields "Post" "Header $gsl_marker_file" "$gsl_log_act_file" \ || continue # Not ALT TEXT ? gsl__check_header_field3 "Post" "Header $gsl_marker_file" if ! [[ -f "$gsl_dir_domain_files/$gsl_post_header_field_2" ]];then gsl__logs_print \ "$gsl_log_e" \ "Post" \ "Header $gsl_marker_file" \ "Not found: $gsl_post_header_field_2" \ "$gsl_dir_domain_files/$gsl_post_header_field_2" gsl_checker_err=true else # Register file to DB with hash gsl_file_csum=` cksum "$gsl_dir_domain_files/$gsl_post_header_field_2" \ | awk '{print $1}'` gsl_db_post_files+="$gsl_post_header_field_2|" gsl__db_line_file fi # Find if ref is in CONTENT [[ `gsl__get_content_with_markers "$gsl_mo_file" "$gsl_mc_file"` ]] \ && gsl_post_stat_file=$(( $gsl_post_stat_file + 1 )) \ && continue # Missing CONTENT gsl__logs_print \ "$gsl_log_e" \ "Post" \ "$gsl_log_c_file" \ "Missing: <_${gsl_post_header_field_1}_>" \ "${PWD}/$gsl_post" gsl_checker_err=true done < <(gsl__get_header "$gsl_marker_file" "$gsl_post") } #======================================================================= # Check Post for CODES HEADER + Content #======================================================================= gsl__post_check_bcodes() { gsl_post_stat_bcode=0 while read -r "gsl_header_content_line" do # Get & Check Header CONTENT gsl__get_header_fields "$gsl_marker_code" gsl__check_header_fields "Post" "Header $gsl_marker_code" "$gsl_log_act_code" \ || continue if ! [[ -f "$gsl_dir_domain_files/$gsl_post_header_field_2" ]];then gsl__logs_print \ "$gsl_log_e" \ "Post" \ "Header $gsl_marker_code" \ "Not found: $gsl_post_header_field_2" \ "$gsl_dir_domain_files/$gsl_post_header_field_2" gsl_checker_err=true else # Register file to DB with hash gsl_file_csum=` cksum "$gsl_dir_domain_files/$gsl_post_header_field_2" \ | awk '{print $1}'` gsl_db_post_files+="$gsl_post_header_field_2|" gsl__db_line_file fi # Find if ref is in CONTENT [[ `gsl__get_content_line \ "$gsl_mark_code : $gsl_post_header_field_1" \ "$gsl_post"` ]] \ && gsl_post_stat_bcode=$(( $gsl_post_stat_bocde + 1 )) \ && continue # Missing CONTENT gsl__logs_print \ "$gsl_log_e" \ "Post" \ "$gsl_log_c_code" \ "Missing: $gsl_mark_code : ${gsl_post_header_field_1}" \ "${PWD}/$gsl_post" gsl_checker_err=true done < <(gsl__get_header "$gsl_marker_code" "$gsl_post") } #======================================================================= # Check Post for IMAGES HEADER + Content #======================================================================= gsl__post_check_images() { gsl_post_stat_image=0 while read -r "gsl_header_content_line" do # Get & Check Header CONTENT gsl__get_header_fields "$gsl_marker_image" gsl__check_header_fields "Post" "Header $gsl_marker_image" "$gsl_log_act_image" \ || continue # Not ALT TEXT ? gsl__check_header_field3 "Post" "Header $gsl_marker_image" if ! [[ -f "$gsl_dir_domain_files/$gsl_post_header_field_2" ]];then gsl__logs_print \ "$gsl_log_e" \ "Post" \ "Header $gsl_marker_image" \ "Not found: $gsl_post_header_field_2" \ "$gsl_dir_domain_files/$gsl_post_header_field_2" gsl_checker_err=true else # Register file to DB with hash gsl_file_csum=` cksum "$gsl_dir_domain_files/$gsl_post_header_field_2" \ | awk '{print $1}'` gsl_db_post_files+="$gsl_post_header_field_2|" gsl__db_exists "$gsl_file_db_files" "$gsl_post_header_field_2" gsl__db_line_file fi # Find if ref is in CONTENT [[ `gsl__get_content_line \ "$gsl_mark_image : $gsl_post_header_field_1" \ "$gsl_post"` ]] \ && gsl_post_stat_image=$(( $gsl_post_stat_image + 1 )) \ && continue # Missing CONTENT gsl__logs_print \ "$gsl_log_e" \ "Post" \ "$gsl_log_c_image" \ "Missing: $gsl_mark_image : ${gsl_post_header_field_1}" \ "${PWD}/$gsl_post" gsl_checker_err=true done < <(gsl__get_header "$gsl_marker_image" "$gsl_post") } #======================================================================= # Check for more STATS #======================================================================= gsl__post_check_stats() { gsl_post_stat_w=` awk -v l="$gsl_post_begin" \ 'NR >= l' \ "$gsl_post" \ | wc -w` gsl_post_stat_w=$(( $gsl_post_stat_w - 2*$gsl_post_stat_p )) }