#!/usr/bin/env bash
# Copyright 2010-2013 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
# http://aws.amazon.com/apache2.0
#
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.

NONGIT_OK=1 OPTIONS_SPEC="\
git secrets --scan [-r|--recursive] [--cached] [--no-index] [--untracked] [<files>...]
git secrets --scan-history
git secrets --install [-f|--force] [<target-directory>]
git secrets --list [--global]
git secrets --add [-a|--allowed] [-l|--literal] [--global] <pattern>
git secrets --add-provider [--global] <command> [arguments...]
git secrets --register-aws [--global]
git secrets --aws-provider [<credentials-file>]
--
scan Scans <files> for prohibited patterns
scan-history Scans repo for prohibited patterns
install Installs git hooks for Git repository or Git template directory
list Lists secret patterns
add Adds a prohibited or allowed pattern, ensuring to de-dupe with existing patterns
add-provider Adds a secret provider that when called outputs secret patterns on new lines
aws-provider Secret provider that outputs credentials found in an ini file
register-aws Adds common AWS patterns to the git config and scans for ~/.aws/credentials
r,recursive --scan scans directories recursively
cached --scan scans searches blobs registered in the index file
no-index --scan searches files in the current directory that is not managed by Git
untracked In addition to searching in the tracked files in the working tree, --scan also in untracked files
f,force --install overwrites hooks if the hook already exists
l,literal --add and --add-allowed patterns are escaped so that they are literal
a,allowed --add adds an allowed pattern instead of a prohibited pattern
global Uses the --global git config
commit_msg_hook* commit-msg hook (internal only)
pre_commit_hook* pre-commit hook (internal only)
prepare_commit_msg_hook* prepare-commit-msg hook (internal only)"

# Include the git setup script. This parses and normalized CLI arguments.
. "$(git --exec-path)"/git-sh-setup

load_patterns() {
  git config --get-all secrets.patterns
  # Execute each provider and use their output to build up patterns
  git config --get-all secrets.providers | while read -r cmd; do
    # Only split words on '\n\t ' and strip "\r" from the output to account
    # for carriage returns being added on Windows systems. Note that this
    # trimming is done before the test to ensure that the string is not empty.
    local result="$(export IFS=$'\n\t '; $cmd | tr -d $'\r')"
    # Do not add empty lines from providers as that would match everything.
    if [ -n "${result}" ]; then
      echo "${result}"
    fi
  done
}

load_allowed() {
  git config --get-all secrets.allowed
  local gitallowed="$(git rev-parse --show-toplevel)/.gitallowed"
  if [ -e "$gitallowed" ]; then
    cat $gitallowed | awk 'NF && $1!~/^#/'
  fi
}

# load patterns and combine them with |
load_combined_patterns() {
  local patterns=$(load_patterns)
  local combined_patterns=''
  for pattern in $patterns; do
    combined_patterns=${combined_patterns}${pattern}"|"
  done
  combined_patterns=${combined_patterns%?}
  echo $combined_patterns
}

# Scans files or a repo using patterns.
scan() {
  local files=("${@}") options=""
  [ "${SCAN_CACHED}" == 1 ] && options+="--cached"
  [ "${SCAN_UNTRACKED}" == 1 ] && options+=" --untracked"
  [ "${SCAN_NO_INDEX}" == 1 ] && options+=" --no-index"
  # Scan using git-grep if there are no files or if git options are applied.
  if [ ${#files[@]} -eq 0 ] || [ ! -z "${options}" ]; then
    output=$(git_grep $options "${files[@]}")
  else
    output=$(regular_grep "${files[@]}")
  fi
  process_output $? "${output}"
}

# Scans through history using patterns
scan_history() {
  # git log does not support multiple patterns, so we need to combine them
  local combined_patterns=$(load_combined_patterns)
  [ -z "${combined_patterns}" ] && return 0
  # Looks for differences matching the patterns, reduces the number of revisions to scan
  local to_scan=$(git log --all -G"${combined_patterns}" --pretty=%H)
  # Scan through revisions with findings to normalize output
  output=$(GREP_OPTIONS= LC_ALL=C git grep -nwHEI "${combined_patterns}" $to_scan)
  process_output $? "${output}"
}

# Performs a git-grep, taking into account patterns and options.
# Note: this function returns 1 on success, 0 on error.
git_grep() {
  local options="$1"; shift
  local files=("${@}") combined_patterns=$(load_combined_patterns)

  [ -z "${combined_patterns}" ] && return 1
  GREP_OPTIONS= LC_ALL=C git grep -nwHEI ${options} "${combined_patterns}" -- "${files[@]}"
}

# Performs a regular grep, taking into account patterns and recursion.
# Note: this function returns 1 on success, 0 on error.
regular_grep() {
  local files=("${@}") patterns=$(load_patterns) action='skip'
  [ -z "${patterns}" ] && return 1
  [ ${RECURSIVE} -eq 1 ] && action="recurse"
  GREP_OPTIONS= LC_ALL=C grep -d "${action}" -nwHEI "${patterns}" "${files[@]}"
}

# Process the given status ($1) and output variables ($2).
# Takes into account allowed patterns, and if a bad match is found,
# prints an error message and exits 1.
process_output() {
  local status="$1" output="$2"
  local allowed=$(load_allowed)
  case "$status" in
    0)
      [ -z "${allowed}" ] && echo "${output}" >&2 && return 1
      # Determine with a negative grep if the found matches are allowed
      echo "${output}" | GREP_OPTIONS= LC_ALL=C grep -Ev "${allowed}" >&2 \
        && return 1 || return 0
      ;;
    1) return 0 ;;
    *) exit $status
  esac
}

# Calls the given scanning function at $1, shifts, and passes to it $@.
# Exit 0 if success, otherwise exit 1 with error message.
scan_with_fn_or_die() {
  local fn="$1"; shift
  $fn "$@" && exit 0
  echo >&2
  echo "[ERROR] Matched one or more prohibited patterns" >&2
  echo >&2
  echo "Possible mitigations:" >&2
  echo "- Mark false positives as allowed using: git config --add secrets.allowed ..." >&2
  echo "- Mark false positives as allowed by adding regular expressions to .gitallowed at repository's root directory" >&2
  echo "- List your configured patterns: git config --get-all secrets.patterns" >&2
  echo "- List your configured allowed patterns: git config --get-all secrets.allowed" >&2
  echo "- List your configured allowed patterns in .gitallowed at repository's root directory" >&2
  echo "- Use --no-verify if this is a one-time false positive" >&2
  exit 1
}

# Scans a commit message, passed in the path to a file.
commit_msg_hook() {
  scan_with_fn_or_die "scan" "$1"
}

# Scans all files that are about to be committed.
pre_commit_hook() {
  SCAN_CACHED=1
  local files=() file found_match=0 rev="4b825dc642cb6eb9a060e54bf8d69288fbee4904"
  # Diff against HEAD if this is not the first commit in the repo.
  git rev-parse --verify HEAD >/dev/null 2>&1 && rev="HEAD"
  # Filter out deleted files using --diff-filter
  while IFS= read -r file; do
    [ -n "$file" ] && files+=("$file")
  done <<< "$(git diff-index --diff-filter 'ACMU' --name-only --cached $rev --)"
  scan_with_fn_or_die "scan" "${files[@]}"
}

# Determines if merging in a commit will introduce tainted history.
prepare_commit_msg_hook() {
  case "$2,$3" in
    merge,)
      local git_head=$(env | grep GITHEAD)  # e.g. GITHEAD_<sha>=release/1.43
      local sha="${git_head##*=}"           # Get just the SHA
      local branch=$(git symbolic-ref HEAD) # e.g. refs/heads/master
      local dest="${branch#refs/heads/}"    # cut out "refs/heads"
      git log "${dest}".."${sha}" -p | scan_with_fn_or_die "scan" -
      ;;
  esac
}

install_hook() {
  local path="$1" hook="$2" cmd="$3" dest
  # Determines the approriate path for a hook to be installed
  if [ -d "${path}/hooks/${hook}.d" ]; then
    dest="${path}/hooks/${hook}.d/git-secrets"
  else
    dest="${path}/hooks/${hook}"
  fi
  [ -f "${dest}" ] && [ "${FORCE}" -ne 1 ] \
    && die "${dest} already exists. Use -f to force"
  echo "#!/usr/bin/env bash" > "${dest}"
  echo "git secrets --${cmd} -- \"\$@\"" >> "${dest}"
  chmod +x "${dest}"
  say "$(tput setaf 2)✓$(tput sgr 0) Installed ${hook} hook to ${dest}"
}

install_all_hooks() {
  install_hook "$1" "commit-msg" "commit_msg_hook"
  install_hook "$1" "pre-commit" "pre_commit_hook"
  install_hook "$1" "prepare-commit-msg" "prepare_commit_msg_hook"
}

# Adds a git config pattern, ensuring to de-dupe
add_config() {
  local key="$1"; shift
  local value="$@"
  if [ ${LITERAL} -eq 1 ]; then
    value=$(sed 's/[\.|$(){}?+*^]/\\&/g' <<< "${value}")
  fi
  if [ ${GLOBAL} -eq 1 ]; then
    git config --global --get-all $key | grep -Fq "${value}" && return 1
    git config --global --add "${key}" "${value}"
  else
    git config --get-all $key | grep -Fq "${value}" && return 1
    git config --add "${key}" "${value}"
  fi
}

register_aws() {
  # Reusable regex patterns
  local aws="(AWS|aws|Aws)?_?" quote="(\"|')" connect="\s*(:|=>|=)\s*"
  local opt_quote="${quote}?"
  add_config 'secrets.providers' 'git secrets --aws-provider'
  add_config 'secrets.patterns' '(A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}'
  add_config 'secrets.patterns' "${opt_quote}${aws}(SECRET|secret|Secret)?_?(ACCESS|access|Access)?_?(KEY|key|Key)${opt_quote}${connect}${opt_quote}[A-Za-z0-9/\+=]{40}${opt_quote}"
  add_config 'secrets.patterns' "${opt_quote}${aws}(ACCOUNT|account|Account)_?(ID|id|Id)?${opt_quote}${connect}${opt_quote}[0-9]{4}\-?[0-9]{4}\-?[0-9]{4}${opt_quote}"
  add_config 'secrets.allowed' 'AKIAIOSFODNN7EXAMPLE'
  add_config 'secrets.allowed' "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"

  if [[ $? == 0 ]]; then
    echo 'OK'
  fi

  exit $?
}

aws_provider() {
  local fi="$1"
  [ -z "$fi" ] && fi=~/.aws/credentials
  # Find keys and ensure that special characters are escaped.
  if [ -f $fi ]; then
    awk -F "=" '/aws_access_key_id|aws_secret_access_key/ {print $2}' $fi \
      | tr -d ' "' \
      | sed 's/[]\.|$(){}?+*^]/\\&/g'
  fi
}

# Ensures that the command is what was expected for an option.
assert_option_for_command() {
  local expected_command="$1"
  local option_name="$2"
  if [ "${COMMAND}" != "${expected_command}" ]; then
    die "${option_name} can only be supplied with the ${expected_command} subcommand"
  fi
}

declare COMMAND="$1" FORCE=0 RECURSIVE=0 LITERAL=0 GLOBAL=0 ALLOWED=0
declare SCAN_CACHED=0 SCAN_NO_INDEX=0 SCAN_UNTRACKED=0

# Shift off the command name
shift 1
while [ "$#" -ne 0 ]; do
  case "$1" in
    -f)
      assert_option_for_command "--install" "-f|--force"
      FORCE=1
      ;;
    -r)
      assert_option_for_command "--scan" "-r|--recursive"
      RECURSIVE=1
      ;;
    -a)
      assert_option_for_command "--add" "-a|--allowed"
      ALLOWED=1
      ;;
    -l)
      assert_option_for_command "--add" "-l|--literal"
      LITERAL=1
      ;;
    --cached)
      assert_option_for_command "--scan" "--cached"
      SCAN_CACHED=1
      ;;
    --no-index)
      assert_option_for_command "--scan" "--no-index"
      SCAN_NO_INDEX=1
      ;;
    --untracked)
      assert_option_for_command "--scan" "--untracked"
      SCAN_UNTRACKED=1
      ;;
    --global) GLOBAL=1 ;;
    --) shift; break ;;
  esac
  shift
done

# Ensure that recursive is not applied with mutually exclusive options.
if [ ${RECURSIVE} -eq 1 ]; then
  if [ ${SCAN_CACHED} -eq 1  ] \
      || [ ${SCAN_NO_INDEX} -eq 1 ] \
      || [ ${SCAN_UNTRACKED} -eq 1 ];
  then
    die "-r|--recursive cannot be supplied with --cached, --no-index, or --untracked"
  fi
fi

case "${COMMAND}" in
  -h|--help|--) "$0" -h; exit 0 ;;
  --add-provider) add_config "secrets.providers" "$@" ;;
  --register-aws) register_aws ;;
  --aws-provider) aws_provider "$1" ;;
  --commit_msg_hook|--pre_commit_hook|--prepare_commit_msg_hook)
    ${COMMAND:2} "$@"
    ;;
  --add)
    if [ ${ALLOWED} -eq 1 ]; then
      add_config "secrets.allowed" "$1"
    else
      add_config "secrets.patterns" "$1"
    fi
    ;;
  --scan) scan_with_fn_or_die "scan" "$@" ;;
  --scan-history) scan_with_fn_or_die "scan_history" "$@" ;;
  --list)
    if [ ${GLOBAL} -eq 1 ]; then
      git config --global --get-regex secrets.*
    else
      git config --get-regex secrets.*
    fi
    ;;
  --install)
    DIRECTORY="$1"
    if [ -z "${DIRECTORY}" ]; then
      DIRECTORY=$(git rev-parse --git-dir) || die "Not in a Git repository"
    elif [ -d "${DIRECTORY}"/.git ]; then
      DIRECTORY="${DIRECTORY}/.git"
    fi
    mkdir -p "${DIRECTORY}/hooks" || die "Could not create dir: ${DIRECTORY}"
    install_all_hooks "${DIRECTORY}"
    ;;
  *) echo "Unknown option: ${COMMAND}" && "$0" -h ;;
esac
