#! /usr/bin/env bash
#
# Create a big file and check that we can parse it and pretty-print it.
#
set -eu

mkdir -p tmp
cd tmp

project_root=$(git rev-parse --show-toplevel)
spacegrep_root="$project_root"/libs/spacegrep

spacegrep="$spacegrep_root"/bin/spacegrep
spacecat="$spacegrep_root"/bin/spacecat
perf_data_dir="$spacegrep_root"/perf-data

_spacegrep="$spacegrep --time"

#
# Any content that looks like source code:
#
cat > small <<EOF
let rec of_pattern (pat : Pattern_AST.t) : t =
  List.fold_left of_pattern_node [] pat |> List.rev

and of_pattern_node acc pat_node =
  match pat_node with
  | End -> acc
  | Atom (loc, atom) -> (
      match atom with
      | Word s -> Atom (loc, Word s) :: acc
      | Punct c -> Atom (loc, Punct c) :: acc
      | Byte c -> Atom (loc, Byte c) :: acc
      | Metavar s ->
          let start, end_ = loc in
          let word_loc = (Loc.Pos.shift start 1, end_) in
          Atom (word_loc, Word s) :: Atom (Loc.sub loc 0 1, Punct '$') :: acc )
  | Dots (loc, None) ->
      (* ... *)
      let pos0, pos3 = loc in
      let pos1 = Loc.Pos.shift pos0 1 in
      let pos2 = Loc.Pos.shift pos1 1 in
      let loc0 = (pos0, pos1) in
      let loc1 = (pos1, pos2) in
      let loc2 = (pos2, pos3) in
      Atom (loc2, Punct '.')
      :: Atom (loc1, Punct '.')
      :: Atom (loc0, Punct '.')
      :: acc
  | Dots (loc, Some s) ->
      (* $...MVAR *)
      let pos0, pos5 = loc in
      let pos1 = Loc.Pos.shift pos0 1 in
      let pos2 = Loc.Pos.shift pos1 1 in
      let pos3 = Loc.Pos.shift pos2 1 in
      let pos4 = Loc.Pos.shift pos3 1 in
      let loc0 = (pos0, pos1) in
      let loc1 = (pos1, pos2) in
      let loc2 = (pos2, pos3) in
      let loc3 = (pos3, pos4) in
      let loc4 = (pos4, pos5) in
      Atom (loc4, Word s)
      :: Atom (loc3, Punct '.')
      :: Atom (loc2, Punct '.')
      :: Atom (loc1, Punct '.')
      :: Atom (loc0, Punct '$')
      :: acc
  | List pat -> List (of_pattern pat) :: acc
EOF

rm -f medium
for i in `seq 1 50`; do
  cat small >> medium
done

rm -f big
for i in `seq 1 50`; do
  cat medium >> big
done

tr '\n' ' ' < medium > medium.oneline
tr '\n' ' ' < big > big.oneline

# 1 MB of random bytes
head -c 1000000 /dev/urandom > big-blob

# A big target that we keep under version control
zcat < "$perf_data_dir"/django-template.po.gz > django-template.po

print() {
  echo "[spacecat] $*" >&2
}

print "Parse and print small file"
time "$spacecat" < small

print "Parse and print big file"
time "$spacecat" < big

print "Parse and print single-line big file"
time "$spacecat" < big.oneline

print "Parse and print big binary file"
time "$spacecat" < big-blob

print "Parse and print big django template"
time "$spacecat" < django-template.po

print() {
  echo "[spacegrep] $*" >&2
}

print "Run simple pattern on small file"
time $_spacegrep foo small

print "Run simple pattern on big file"
time $_spacegrep foo big

print "Run simple pattern on big file - no pre-match optimization"
time $_spacegrep foo big --no-skip-search

print "Run ellipsis pattern on medium file"
time $_spacegrep '$X ... foo' medium

print "Run ellipsis pattern on medium one-line file"
time $_spacegrep '$X ... foo' medium.oneline --force

print "Run ellipsis pattern on big file"
time $_spacegrep '$X ... foo' big

# The following takes forever because we hit O(n^2) complexity:
# print "Run ellipsis pattern on single-line big file"
# time $_spacegrep '$X ... foo' big.oneline --force

print "Run ellipsis pattern on big binary file"
time $_spacegrep '$X ... foo' big-blob --force

print "Run complex pattern on big django template"
time $_spacegrep -p "$perf_data_dir"/django-template.pat -d django-template.po

print "Run complex pattern on big django template - no pre-match optimization"
time $_spacegrep --no-skip-search \
  -p "$perf_data_dir"/django-template.pat -d django-template.po
