#!/usr/bin/env perl
use strict;
use warnings;
use autodie;
use v5.40;
use builtin ':5.40';
use File::Copy;
use File::Find;
use File::Path qw(make_path remove_tree);

my $BASE_URL = 'https://ieks.cc/';
my $TITLE = 'ieks.cc';
my $AUTHOR = 'Lucas Ieks';
my $EMAIL = 'lucas@ieks.cc';

my $STYLE = <<'STYLE_EOF';
@font-face {
	font-family: "Fira Code";
	src: url("/fonts/FiraCode-VF.woff2") format("woff2"),
		url("/fonts/FiraCode-VF.woff") format("woff");
	font-weight: 300 700;
	font-style: normal;
	font-display: swap;
}
@font-face {
	font-family: "Linux Libertine";
	src: url("/fonts/LinLibertine_R.woff") format("woff");
	font-weight: 400;
	font-style: normal;
	font-display: swap;
}
@font-face {
	font-family: "Linux Libertine";
	src: url("/fonts/LinLibertine_RI.woff") format("woff");
	font-weight: 400;
	font-style: italic;
	font-display: swap;
}
@font-face {
	font-family: "Linux Libertine";
	src: url("/fonts/LinLibertine_RB.woff") format("woff");
	font-weight: 700;
	font-style: normal;
	font-display: swap;
}
@font-face {
	font-family: "Linux Libertine";
	src: url("/fonts/LinLibertine_RBI.woff") format("woff");
	font-weight: 700;
	font-style: italic;
	font-display: swap;
}
:root { color-scheme: light dark; }
html {
	font-size: max(14pt, 1rem);
	font-family: "Linux Libertine", "Libertinus Serif", serif;
	font-size-adjust: ex-height 0.5;
	background: #181408;
	color: #eeeeee;
}
body { max-width: min(70ch, 100%); margin: 0 auto 25vh }
header, main, footer { margin: 1rem }
a { color: lightblue }
@media (prefers-color-scheme: light) {
	html { background: #eeeeee; color: #080808 }
	a { color: darkblue }
}
@media print {
	nav, header, footer { display: none }
}
a { text-decoration: none }
a:hover { text-decoration: underline dashed }
h1 a, h2 a, h3 a, sup a { color: inherit }
nav, h1, h2, h3 { font-variant-caps: small-caps; text-wrap: balance }
nav h1 { font-style: italic; letter-spacing: -0.1em }
h1, h2, h3 { margin: 1rem 0; line-height: 1; text-align: center }
nav { margin: 1rem; padding-left: 0 }
nav h1 { text-align: left; font-variant-caps: normal }
nav ol { list-style-type: none; padding: 0; margin: 0 1rem }
nav a::after { content: "\a0\2192" }
small { font-size: 0.75em }
ul { margin: 1rem; padding: 0; list-style: inside square }
h1 { font-size: 2.618rem }
h2 { font-size: 1.618rem }
h3 { font-size: 1rem }
p, div, li, blockquote {
	line-height: 1.5;
	text-align: justify;
	hyphens: auto;
}
p { margin: 1rem 0 }
h1 + p:not(.links)::first-line { font-variant-caps: small-caps }
code { font-family: "Fira Code", monospace }
img { margin: 1rem; width: calc(100% - 2rem) }
blockquote { margin: 0 1rem; font-style: italic }
cite { font-style: italic }
pre { line-height: 1.25; margin: 1rem }
sup { line-height: 0; font-size: 0.618em }
footer p { font-size: 0.618rem }
code > .comment { color: #8fe }
code > .keyword, code > .operator { color: #999 }
code > .type { color: #f7f }
code > .literal { color: #0bf }
code > .string { color: #dd2 }
code > .function { color: #4e4 }
@media (prefers-color-scheme: light) {
	code > .comment { color: #08a }
	code > .keyword, code > .operator { color: #555 }
	code > .type { color: #a06 }
	code > .literal { color: #03c }
	code > .string { color: #e70 }
	code > .function { color: #090 }
}
STYLE_EOF

sub html_esc($input) {
	$input =~ s/&/&amp;/g;
	$input =~ s/</&lt;/g;
	$input =~ s/>/&gt;/g;
	$input =~ s/"/&quot;/g;
	return $input;
}

sub html_sub($input) {
	$input =~ s/&/&amp;/g;
	$input =~ s/---/&mdash;/g;
	$input =~ s/--/&ndash;/g;
	$input =~ s/->/&rightarrow;/g;
	$input =~ s/</&lt;/g;
	$input =~ s/>/&gt;/g;
	$input =~ s/(^| )"/$1“/g;
	$input =~ s/"([ .,;:[)]|$)/”$1/g;
	$input =~ s/"/&quot;/g;
	$input =~ s/'/’/g;
	return $input;
}

sub code($lang, $input) {
	local *span = sub ($class, $contents) {
		qq(<span class="$class">@{[html_esc($contents)]}</span>)
	};
	if ($lang eq "css") {
		$input =~ s`
			(//.*$)
			| ([-\w]+)(?=\s*:)
			| ([{}:;()\[\]])
		`
			defined $1 ? span("comment", $1) :
			defined $2 ? span("keyword", $2) :
			defined $3 ? span("operator", $3) :
			die("missing replacement for $&")
		`mxge;
	} elsif ($lang eq "sh") {
		$input =~ s`
			(\#.*$)
			| (\$(?:\@
			     | \w+
			     | \{.*?\}))
			| (?<=(?:^|[|;&])\s{,32}(?:\S{1,32}/)?)([^\s/]+)(?=\s|$)
		`
			defined $1 ? span("comment", $1) :
			defined $2 ? span("literal", $2) :
			defined $3 ? span("function", $3) :
			die("missing replacement for $&")
		`mxge;
	} elsif ($lang eq "gmi") {
		$input =~ s`
			(\#.*$)
			| (\*|>|=>\s*\S+)
			| (\[[a-z]+\])
		`
			defined $1 ? span("comment", $1) :
			defined $2 ? span("keyword", $2) :
			defined $3 ? span("keyword", $3) :
			die("missing replacement for $&")
		`mxge;
	} elsif ($lang eq "tide") {
		$input =~ s`
			(?<comment>//.*$|/\*(.|\n)*?\*/)
			| (?<operator>[(){}\[\],.:;|]
			  | [!=<>+\-*/%]=?
			  | \b(and|or|is|in)\b)
			| \b(?<keyword>break|else|fn|for|if|import
			    | label|let|mut|pub|record|return|switch|while)\b
			| \b(?<literal>\b(false|true|\d+))\b
			| (?<string>"([^\\"]|\\.)*")
			| \b(?<type>bool|int|string|void
			    | ([a-z_]\w+\s*:)? [A-Z]\w*)\b
			| \b(?<function>([a-z_]\w+\s*:)? [a-z]\w*)(?=\s*\()
			| \b(?<variable>([a-z_]\w+\s*:)? [a-z]\w*)(?!\s*\()
		`
			defined $+{comment} ? span("comment", $+{comment}) :
			defined $+{operator} ? span("operator", $+{operator}) :
			defined $+{keyword} ? span("keyword", $+{keyword}) :
			defined $+{literal} ? span("literal", $+{literal}) :
			defined $+{string} ? span("string", $+{string}) :
			defined $+{type} ? span("type", $+{type}) :
			defined $+{function} ? span("function", $+{function}) :
			defined $+{variable} ? html_esc($+{variable}) :
			die("missing replacement for $&")
		`mnxge;
	} elsif ($lang eq "go") {
		$input =~ s`
			(?<comment>//.*$|/\*(.|\n)*?\*/)
			| (?<operator>[(){}\[\],.~;]
			  | [!=<>+\-*/%&|^:]=?
			  | (<<|>>|&^)=?
			  | &&|\|\||\+\+|--|<-)
			| \b(?<keyword>break|case|const|continue|default
			    | defer|else|fallthrough|for|^func|go|goto
			    | if|import|package|range|return
			    | select|switch|type|var)\b
			| (?<literal>0[bB][01_]+|0[oO][0-8_]+
			  | 0[xX][\dA-Fa-f_]+
			  | \b\d+(\.\d*([eE][-+]?\d+)?)?\b
			  | \.\d+([eE][-+]?\d+)?\b)
			| (?<string>(?<startstring>['"])
			   ([^\\']
			   | \\[abfnrtv\\'"]
			   | \\x[\dA-Fa-f]{2}
			   | \\u[\dA-Fa-f]{4}
			   | \\U[\dA-Fa-f]{8})+
			   \g{startstring}
			  | \`(.|\n)*\`)
			| \b(?<type>u?int(8|16|32|64)?|uintptr
			    | float(32|64)|complex(64|128)
			    | byte|rune|string
			    | struct|map|interface|chan|func)
			| \b(?<function>[A-Za-z_]\w*)(?=\s*\()
			| \b(?<variable>[A-Za-z_]\w*)(?!\s*\()
		`
			defined $+{comment} ? span("comment", $+{comment}) :
			defined $+{operator} ? span("operator", $+{operator}) :
			defined $+{keyword} ? span("keyword", $+{keyword}) :
			defined $+{literal} ? span("literal", $+{literal}) :
			defined $+{type} ? span("type", $+{type}) :
			defined $+{function} ? span("function", $+{function}) :
			defined $+{variable} ? html_esc($+{variable}) :
			die("missing replacement for $&")
		`mnxge;
	} elsif ($lang eq "perl") {
		# this is hacky and incomplete
		$input =~ s`
			(?<comment>\#.*$)
			| (?<variable>[\$\@\%]([+&]|[a-z]\b))(?!\s*\()
			| (?<operator>[(){}\[\],.:;|]
			  | [!=<>+\-*/\%&|~?]+
			  | \bdefined\b)
			| (?<keyword>\b(my|sub)\b)
			| (?<regex>\b[sr]?\`.*?\`|\`[mnxges]*\b)
			| (?<string>"([^\\"]|\\.)*"|(?<=\{)[a-z_]+(?=\}))
			| \b(?<function>\b[a-z]\w*(?=\s*\()|(?<=\bsub)\s+[a-z]\w*\b)
		`
			defined $+{comment} ? span("comment", $+{comment}) :
			defined $+{variable} ? html_esc($+{variable}) :
			defined $+{operator} ? span("operator", $+{operator}) :
			defined $+{keyword} ? span("keyword", $+{keyword}) :
			defined $+{regex} ? span("literal", $+{regex}) :
			defined $+{string} ? span("string", $+{string}) :
			defined $+{function} ? span("function", $+{function}) :
			die("missing replacement for $&")
		`mnxges;
	} else {
		$input = html_esc($input);
	}
	return $input;
}

sub body($header, $input) {
	if ($input =~ /^\[([a-z]+)\]/) {
		$input = qq(<small id="$header.$1">@{[html_sub($input)]}</small>);
	} elsif ($input =~ /^---/) {
		$input = qq(<cite>@{[html_sub($input)]}</cite>);
	} else {
		$input = html_sub($input);
		$input =~ s|\[([a-z]+)\]|<sup><a href="#$header.$1">$&</a></sup>|g;
	}
	return $input;
}

sub gmi($name, $in, $written, $revised) {
	print "gmi $name\n";
	open(my $gmi, '>', ".out/gmi/$name");
	print $gmi "=> / $TITLE\n";
	if ($name !~ m|/index.gmi|) {
		print $gmi "=> index.gmi up\n";
	} elsif ($File::Find::dir ne '.') {
		print $gmi "=> ../ up\n";
	}
	print $gmi "\n";
	my $literal = false;
	while (<$in>) {
		$literal = !$literal if (/^```/);
		unless ($literal) {
			s/---/—/g;
			s/--/–/g;
			s/->/→/g;
			s/(^| )"/$1“/g;
			s/"([ .,;:[)]|$)/”$1/g;
			s/'/’/g;
		}
		print $gmi $_;
	}
	if ($written) {
		print $gmi "\nCopyright © " . ($written =~ s/-.*-.*$//r);
		if (defined($revised) and ($revised =~ s/-.*-.*$//r) ne ($written =~ s/-.*-.*$//r)) {
			print $gmi "–" . ($revised =~ s/-.*-.*$//r);
		}
		print $gmi " by Lucas Ieks.\n";
		print $gmi "Licensed under CC-BY-SA 4.0, unless stated otherwise.\n";
		print $gmi "Some rights reserved.\n";
	}
}

sub www($name, $in, $written, $revised, $language, $title) {
	print "www $name\n";
	open(my $www, '>', ".out/www/@{[$name =~ s/\.gmi$/.html/r]}");
	print $www qq(<!DOCTYPE html><html><head><meta charset="utf-8">);
	print $www qq(<meta name="viewport" content="width=device-width,initial-scale=1">\n);
	print $www qq(<link rel="alternate" type="application/atom+xml" title="$TITLE" href="${BASE_URL}atom.xml">\n);
	print $www "<title>$title</title>\n";
	print $www "<style>\n$STYLE</style><body lang=$language>";
	print $www "<nav>";
	print $www qq(<h1><a href="/">$TITLE</a></h1>\n);
	if ($name !~ m|/index.gmi|) {
		print $www qq(<a href="index.html">up</a>\n);
	} elsif ($File::Find::dir ne '.') {
		print $www qq(<a href="../">up</a>\n);
	}
	my ($h1, $h2, $h3) = (0, 0, 0);
	print $www "<ol>\n";
	while (<$in>) {
		if (/^```/) {
			while (defined($_ = <$in>) && !/^```/) {}
		} elsif (/^###/) {
			s/^###\s*(.*)\n?$/$1/;
			print $www "<li><ol>\n" if $h3 == 0;
			$h3++;
			print $www qq(<li><a href="#$h1.$h2.$h3">@{[html_sub($_)]}</a></li>\n);
		} elsif (/^##/) {
			s/^##\s*(.*)\n?$/$1/;
			print $www "<li><ol>\n" if $h2 == 0;
			$h2++;
			print $www "</ol></li>\n" if $h3 > 0;
			$h3 = 0;
			print $www qq(<li><a href="#$h1.$h2">@{[html_sub($_)]}</a></li>\n);
		} elsif (/^#/) {
			s/^#\s*(.*)\n?$/$1/;
			$h1++;
			print $www "</ol></li>\n" if $h3 > 0;
			print $www "</ol></li>\n" if $h2 > 0;
			($h3, $h2) = (0, 0);
		}
	}
	print $www "</ol></li>\n" if $h3 > 0;
	print $www "</ol></li>\n" if $h2 > 0;
	print $www "</ol></nav>";
	if (defined $title) {
		print $www "<header>\n";
	} else {
		print $www "<main>\n";
	}
	seek($in, 0, 0);
	($h1, $h2, $h3) = (0, 0, 0);
	my $header;
	$_ = <$in>;
	while (defined $_) {
		if (/^\s*$/) {
			while (defined($_ = <$in>) && /^\s*$/) {}
		} elsif (/^```/) {
			my $lang = s/^```\s*(.*)\n?$/$1/r;
			my $code = "";
			$code .= $_ while defined($_ = <$in>) && !/^```/;
			print $www "<pre><code>@{[code($lang, $code)]}</code></pre>\n";
			last unless defined($_ = <$in>);
		} elsif (/^\*/) {
			print $www "<ul>\n";
			do {
				s/^\*\s*(.*)\n?$/$1/;
				print $www "<li>@{[body($header, $_)]}</li>\n";
			} while (defined($_ = <$in>) && /^\*/);
			print $www "</ul>\n"
		} elsif (/^###/) {
			s/^###\s*(.*)\n?$/$1/;
			$h3++;
			$header = "$h1.$h2.$h3";
			print $www qq(<h3 id="$header"><a href="#$header">@{[body($header, $_)]}</a></h3>\n);
			last unless defined($_ = <$in>);
		} elsif (/^##/) {
			s/^##\s*(.*)\n?$/$1/;
			$h2++;
			$h3 = 0;
			$header = "$h1.$h2";
			print $www qq(<h2 id="$header"><a href="#$header">@{[body($header, $_)]}</a></h2>\n);
			last unless defined($_ = <$in>);
		} elsif (/^#/) {
			s/^#\s*(.*)\n?$/$1/;
			print $www "</header><main>\n" if $h1 == 0;
			$h1++;
			($h3, $h2) = (0, 0);
			$header = "$h1";
			print $www qq(<h1 id="$header"><a href="#$header">@{[body($header, $_)]}</a></h1>\n);
			last unless defined($_ = <$in>);
		} elsif (/^>/) {
			s/^>\s*(.*)\n?$/$1/;
			print $www "<blockquote>@{[body($header, $_)]}</blockquote>\n";
			last unless defined($_ = <$in>);
		} else {
			if (/^=>/) {
				print $www qq(<p class="links">);
			} else {
				print $www "<p>";
			}
			while (1) {
				if (/^=>/) {
					my ($href, $body) = /^=>\s*(\S+)\s+(.*)?$/;
					$href =~ s/\.gmi$/.html/ unless $href =~ m|^gemini://|;
					if ($href =~ /\.(jpg|png)$/) {
						print $www qq(<a href="$href"><img src="$href" alt="@{[body($header, $body)]}"></a>\n);
					} else {
						print $www qq(<a href="$href">@{[body($header, $body)]}</a>\n);
					}
				} else {
					s/\n$//s;
					print $www body($header, $_);
				}
				last unless defined($_ = <$in>) && !/^\s*$|^#|^>|^\*|^```/;
				print $www "<br>\n";
			}
			print $www "</p>\n";
		}
	}
	print $www "</main>";
	if ($written) {
		print $www "<footer><p>";
		print $www "Copyright © @{[$written =~ s/-.*-.*$//r]}";
		print $www "–" . ($revised =~ s/-.*-.*$//r) if defined($revised) and ($revised =~ s/-.*-.*$//r) ne ($written =~ s/-.*-.*$//r);
		print $www " by Lucas Ieks.<br>";
		print $www "Licensed under CC-BY-SA 4.0, unless stated otherwise.<br>";
		print $www "Some rights reserved.</p></footer>";
	}
	print $www "\n";
}

my %published;
my $latest = "";

sub process() {
	return if /\/\./;
	return unless -f;
	s|^\./||;
	if (/\.gmi$/) {
		my $name = $_;
		open(my $in, "<", $name);
		my ($written, $revised);
		my ($language, $title) = ('en', $TITLE);
		while (<$in>) {
			if (/^Written:\s*/) {
				$written = s/^Written:\s*(.*)\n?$/$1/r;
			} elsif (/^Revised:\s*/) {
				$revised = s/^Revised:\s*(.*)\n?$/$1/r;
			} elsif (/^Language:\s*/) {
				$language = s/^Language:\s*(.*)\n?$/$1/r;
			} elsif (/^#[^#]/) {
				$title = s/^#\s*(.*)\n?$/$1/r;
				last;
			}
		}
		return unless $written or $name =~ m`(^|/)(index|404).gmi$`;
		make_path(".out/gmi/${File::Find::dir}");
		make_path(".out/www/${File::Find::dir}");
		seek($in, 0, 0);
		gmi $name, $in, $written, $revised;
		seek($in, 0, 0);
		www $name, $in, $written, $revised, $language, $title;
		if (defined $written) {
			$published{$name} = [$written, $revised // $written, $language, $title];
			$latest = $revised // $written if ($latest cmp ($revised // $written)) < 0;
		}
	} elsif (/\.(html|css|js|woff2?)$/) {
		print "www copy $_\n";
		make_path(".out/www/${File::Find::dir}");
		copy($_, ".out/www/${File::Find::dir}");
	} else {
		print "copy $_\n";
		make_path(".out/gmi/${File::Find::dir}", ".out/www/${File::Find::dir}");
		copy($_, ".out/gmi/${File::Find::dir}");
		copy($_, ".out/www/${File::Find::dir}");
	}
}

remove_tree(".out/www", ".out/gmi", { safe => true });

find({ wanted => \&process, no_chdir => true }, ".");

my @published_sorted = sort {
	$published{$b}->[1] cmp $published{$a}->[1] || $a cmp $b
} keys %published;

open(my $gmifeed, '>', ".out/gmi/feed.gmi");
print $gmifeed "=> / $TITLE\n\n";
print $gmifeed "# $TITLE\n";
print $gmifeed "\nLast update: $latest\n";
foreach my $name (@published_sorted) {
	my ($written, $revised, $language, $title) = @{$published{$name}};
	next if $language ne "en";
	print "gmi feed $name\n";
	print $gmifeed "=> $name $revised $title\n";
}

open(my $wwwfeed, '>', ".out/www/atom.xml");
print $wwwfeed q(<?xml version="1.0" encoding="utf-8"?>);
print $wwwfeed q(<feed xmlns="http://www.w3.org/2005/Atom">);
print $wwwfeed qq(<id>$BASE_URL</id>);
print $wwwfeed qq(<title>$TITLE</title>);
print $wwwfeed qq(<updated>${latest}T12:00:00Z</updated>);
print $wwwfeed qq(<link href="${BASE_URL}atom.xml" rel="self"/>);
print $wwwfeed qq(<author><name>$AUTHOR</name><email>$EMAIL</email></author>);
print $wwwfeed qq(<generator uri="${BASE_URL}gmiweb.pl">gmiweb.pl</generator>);
foreach my $name (@published_sorted) {
	my ($written, $revised, $language, $title) = @{$published{$name}};
	next if $language ne "en";
	print "www feed $name\n";
	my $url = $name =~ s/\.gmi$/.html/r;
	print $wwwfeed "<entry>";
	print $wwwfeed "<title>$title</title>";
	print $wwwfeed qq(<link href="$BASE_URL$url"></link>);
	if (-f $name =~ s/\.gmi$/-pt-BR.gmi/r) {
		print $wwwfeed qq(<link rel="alternate" href="$BASE_URL@{[$name =~ s/\.gmi$/-pt-BR.html/r]}" hreflang="pt-BR"></link>);
	}
	print $wwwfeed "<id>$BASE_URL$url</id>";
	print $wwwfeed "<published>${written}T12:00:00Z</published>";
	print $wwwfeed "<updated>${revised}T12:00:00Z</updated>";
	print $wwwfeed "</entry>"
}
print $wwwfeed "</feed>";

system 'cd .out/gmi && tar -cz * > ../gmi.tar.gz';
system 'cd .out/www && tar -cz * > ../www.tar.gz';
