#!/bin/sh
#
# Script that handles the wiki, it mainly creates autogenerated pages and
# converts the wiki to various formats.

if ! pandoc -h > /dev/null 2>&1; then
  echo "You need to install pandoc."
  exit 1
fi

if ! dadadodo -h > /dev/null 2>&1 ; then
  echo "WARNING: you don't have dadadodo installed, random text generation won't work."
  sleep 3
fi

# NOTE: we are using pandoc to convert MD to HTML. You can use another one if
# you want, there are many, but be careful, there are plethora of MD flavors
# and most other implementations will break SOME pages if just dropped-in here.
# To check if the outputs is fine with another markdown implementaion, check
# out the wiki_test.md page.
MD_CMD="pandoc -f markdown-tex_math_dollars-smart-footnotes-inline_notes -t html"

# ======== autogenerate pages ========

if true; then # can quickly be disabled here
  echo "making autogenerated pages"

  FILE_NAME="wiki_stats.md"

  printf "# LRS Wiki Stats\n\nThis is an autogenerated article holding stats about this wiki.\n\n" > $FILE_NAME

  printf -- "- number of articles: " >> $FILE_NAME
  ls *.md | wc -l | sed "s/ *//g" >> $FILE_NAME

  printf -- "- number of commits: " >> $FILE_NAME

  if test -e .git; then
    git rev-list --count --all >> $FILE_NAME
  else
    echo "not a git repo :/" >> $FILE_NAME
  fi

  printf -- "- total size of all texts in bytes: " >> $FILE_NAME
  cat *.md | wc -c | sed "s/ *//g" >> $FILE_NAME

  printf -- "- total number of lines of article texts: " >> $FILE_NAME
  cat *.md | wc -l | sed "s/ *//g" >> $FILE_NAME

  printf -- "- number of script lines: " >> $FILE_NAME
  cat *.sh | wc -l | sed "s/ *//g" >> $FILE_NAME

  printf -- "- occurrences of the word \"person\": " >> $FILE_NAME
  grep -o -i "person[s \n\.,]" *.md | wc -l | sed "s/ *//g" >> $FILE_NAME

  printf -- "- occurrences of the word \"nigger\": " >> $FILE_NAME
  grep -o -i "[^\[]nigger" *.md | wc -l | sed "s/ *//g" >> $FILE_NAME

  printf "\nlongest articles:\n\n\`\`\`\n" >> $FILE_NAME
  ls -1hSs *.md | head -n 20 >> $FILE_NAME
  printf "\n\`\`\`\n" >> $FILE_NAME

  echo "counting most common words"

  printf "top 50 5+ letter words:\n\n\`\`\`\n" >> $FILE_NAME

  cat *.md | sed "s/[^ ]*\.md/ /g" | tr -cs "[:alpha:]" "\n" | grep "......*" | \
    tr "A-Z" "a-z" | sort | uniq -c | sort -nr | head -n 50 >> $FILE_NAME

  printf "\`\`\`\n" >> $FILE_NAME

  printf "latest changes:\n\n\`\`\`\n" >> $FILE_NAME

  if test -e .git; then
    git log --name-only | head -n 50 | grep -e "\.md" -e "Date:" | sed "s/  */ /g" | sed "s/\([^ ]*\.md\)/  \1/g" >> $FILE_NAME
  else
    echo "not a git repo :/" >> $FILE_NAME
  fi

  printf "\`\`\`\n" >> $FILE_NAME

  echo "counting most wanted pages"

  printf "\nmost wanted pages:\n\n" >> $FILE_NAME

  cat *.md | grep -o "([^\(]*\.md)" | sed "s/[\(\)]//g" | sort | uniq -c | sort -nr > _tmp.txt

  printf "" > _tmp2.txt

  while read -r line; do
    FNAME=`echo "$line" | grep -o "[^ ]*\.md" -`

    if ! test -f "$FNAME"; then
      printf '%s\n' "- [$line]($FNAME)" >> _tmp2.txt
    fi
  done < _tmp.txt

  cat _tmp2.txt | head -n 20 >> $FILE_NAME

  printf "\nmost popular and lonely pages:\n\n\`\`\`\n" >> $FILE_NAME

  printf "" > _tmp2.txt

  while read -r line; do
    if test -f `echo "$line" | grep -o "[^ ]*\.md" -`; then
      echo "$line" >> _tmp2.txt
    fi
  done < _tmp.txt

  cat _tmp2.txt | head -n 30 >> $FILE_NAME
  printf "...\n" >> $FILE_NAME
  cat _tmp2.txt | tail -n 30 >> $FILE_NAME

  printf "\n\`\`\`\n\nMarkov chain random text by dadadodo:\n\n" >> $FILE_NAME

  cat *.md | sed "s/\[\([^]]*\)\]([^)]*)/\1/g" | grep "^[A-Za-z].\{100,\}" > _tmp.txt
  dadadodo -c 8 _tmp.txt 2> /dev/null | sed "s/^ *\([^ ]\)/\1/g" | sed "s/  */ /g" | fmt -w 1111 >> $FILE_NAME

  echo "making random page links"

  printf "# Random Article\n\nPlease kindly click random link.\n\n" > random_page.md

  ls *.md | sort -R > _tmp.txt
  ls *.md | sort -R >> _tmp.txt
  ls *.md | sort -R >> _tmp.txt

  cat _tmp.txt | sed "s/^\(.*\)$/[#](\1)/g" | tr "\n" " " >> random_page.md
fi

echo "converting to other formats"

rm -rf html 2> /dev/null
mkdir html

rm -rf txt 2> /dev/null
mkdir txt

rm -rf full 2> /dev/null
mkdir full

rm lrs_wiki.7z 2> /dev/null

cp report.html pimp_my_lrs.html *.css html/

FILECOUNT=`ls *.md | wc -l | sed "s/  *//g"`
FILELIST="wiki_pages"
HEADER1="<html><head><link rel=\"stylesheet\" href=\"style.css\"><title> LRS Wiki: "
HEADER2="</title></head><body><h1>less_retarded_wiki</h1><span class=\"nav\"><a href=\"main.html\">main page</a>, <a class=\"notdead\" href=\"$FILELIST.html\">file list ($FILECOUNT)</a>, <a class=\"notdead\" href=\"https://git.coom.tech/drummyfish/less_retarded_wiki/archive/master.zip\">source</a>, <a class=\"notdead\" href=\"lrs_wiki.7z\">all in md+txt+html+pdf</a>, <a class=\"notdead\" href=\"https://git.coom.tech/drummyfish/less_retarded_wiki.atom\">commit RSS feed</a>, <a class=\"notdead\" href=\"report.html\">report abuse</a>, <a class=\"notdead\" href=\"wiki_stats.html\">stats</a>, <a class=\"notdead\" href=\"random_page.html\">random article</a>, <a class=\"notdead\" id=\"fancylink\" href=\"pimp_my_lrs.html?p=main.html&s=style_fancy.css\">consoomer version</a></span><hr />"
FOOTER="<hr /><p> Powered by nothing. All content available under <a class=\"notdead\" href=\"https://creativecommons.org/publicdomain/zero/1.0/\">CC0 1.0</a> (public domain). Send comments and corrections to drummyfish at disroot dot org. </p></body></html>"

printf "# Wiki Files\n\nThis is an autogenerated page listing all pages.\n\n" > $FILELIST.md

ls *.md | sed "s/^\(.*\\).md$/[\1](\1.md)/g" | tr "\n" " " | sed "s/) \[/\) --- \[/g" >> $FILELIST.md

echo "<html><head><link rel=\"stylesheet\" href=\"style.css\"><title>LRS Wiki</title></head><body><h1>less_retarded_wiki</h1><p>by drummyfish, generated on "`date +"%D"`", available under <a href=\"https://creativecommons.org/publicdomain/zero/1.0/\">CC0 1.0</a> (public domain) </p><hr />" > lrs_wiki.html
echo "Less retarded wiki (TTS version), by drummyfish, released under CC0 1.0, public domain." > lrs_wiki_tts.txt
echo "LESS RETARDED WIKI" > lrs_wiki.txt
echo "by drummyfish, released under CC0 1.0, public domain" >> lrs_wiki.txt

for f in *.md; do
  echo "$f"

  FNAME=$(echo "$f" | sed "s/\.md//g")
  F2="html/${FNAME}.html"

  $MD_CMD $f > _tmp.html

  echo "$HEADER1 $FNAME $HEADER2" > $F2
  cat _tmp.html | sed "s/\.md\"/.html\"/g" >> $F2
  echo $FOOTER >> $F2

  echo "<span class=\"article\" id=\"$FNAME\">$FNAME</span><br />" >> lrs_wiki.html
  cat _tmp.html | sed "s/href=\"\([^\"]*\)\.md\"/href=\"#\1\"/g" >> lrs_wiki.html
  echo "<hr />" >> lrs_wiki.html

  cp _tmp.html _tmp2.html
  echo "<html><head></head><body>" > _tmp.html
  cat _tmp2.html >> _tmp.html
  echo "</body></html>" >> _tmp.html

  links -html-numbered-links 0 -dump _tmp.html | sed "s/file:\/\/.*less_retarded_wiki\///g" >> txt/$FNAME.txt
  echo "--------------------------------------------------------------------------------\n$f:" >> lrs_wiki.txt
  cat txt/$FNAME.txt >> lrs_wiki.txt

  echo "\nFILE $f:\n" >> lrs_wiki_tts.txt
  cat $f | \
    sed "s/^\#\#\# /HEADING 3: /g" | \
    sed "s/^\#\# /HEADING 2: /g" | \
    sed "s/^\# /HEADING 1: /g" | \
    sed "s/^ *- /LIST ITEM: /g" | \
    sed "s/\*\*\([^*]*\)\*\*/\1/g" | \
    sed "s/\*\([^*]*\)\*/\1/g" | \
    sed "s/\[\([^]]*\)\]([^)]*)/\1/g" | \
    sed "s/^ *\([0-9][0-9]*\)\. /ITEM \1: /g" | \
    sed "s/{\([^{]*\)}/; COMMENT: \1; END OF COMMENT;/g" | \
    sed "s/^\`\`\`[^\`]*$/CODE SEGMENT/g" | \
    sed -E "s/^([^a-zA-Z0-9]{4,}|.*[- |'.,;:!^\/\\<>_{}]{8,}.*[- |'.,;:!^\/\\<>_{}]{4,}.*)$/.../g" | \
    sed "s/^ *|.*| *$/TABLE/g" | \
    grep "..*" | uniq >> lrs_wiki_tts.txt
done

echo "</body></html>" >> lrs_wiki.html

if true; then # can be quickly disabled here
  if ! test -f mark_dead_links; then
    echo "compiling helper program"
    cc -O3 mark_dead_links.c -o mark_dead_links
  fi

  cd html

  echo "marking dead links"

  for f in *.html; do
    cat $f | ../mark_dead_links > tmp
    mv tmp $f
  done

  cd ..
  rm mark_dead_links
fi

echo "$HEADER1 redirect $HEADER2<a href="main.html">Go to main page.</a>$FOOTER" >> html/index.html
echo "</ul> $FOOTER" >> html/$FILELIST

echo "making pdf"

# Using htmldoc here instead of pandoc as it seems to be more KISS and give
# smaller and nicer pdf. You may also need to tweak the flags on your system.

PDFFONT="NotoSansMono"

HTMLDOC_DATADIR="/usr/local/share/htmldoc"

if test -e "$HTMLDOC_DATADIR"; then
  HTMLDOC_DATADIR="--datadir $HTMLDOC_DATADIR"
else
  HTMLDOC_DATADIR=""
fi

htmldoc $HTMLDOC_DATADIR --bodyfont sans --fontsize 10 \
  --gray --left 8mm --right 8mm --top 8mm \
  --bottom 8mm --continuous --header ... --header1 ... --footer ct/ -t pdf \
  --charset utf-8 -f lrs_wiki.pdf lrs_wiki.html

mv lrs_wiki.html lrs_wiki.txt lrs_wiki_tts.txt lrs_wiki.pdf full/

rm _tmp.html _tmp2.html _tmp.txt _tmp2.txt 2> /dev/null

echo "compressing"

7z a -t7z -m0=lzma -mx=9 -mfb=64 -md=32m -ms=on lrs_wiki.7z *.md *.sh *.html *.css html txt full

echo "done"
