#!/usr/bin/python3
"""This script generates cmake dependencies for the autogenerated lttng files.

This script generates 2 results:
1. CMake targets to generate the required headers for trace files
   (--targets_output_path).
   This should be included in CMakeLists.txt in "project_path"
2. CMake definitions for file attributes that define a unique hash for each
   compilation unit that depends on the traces. This allows us to generate
   unique events for each compilation unit.
   This should be included in CMakeLists.txt of each target that compiles these
   files.

Assumptions:
1. All code files in the project that might depend on generated traces must end
   with an extension in CODE_FILE_EXTENSIONS.
2. All code files in the project are built with the same include path, or, at
   least don't refer to different files in the same #include.
   This means that if 2 files call #include "a.h" they can't be compiled with
   two different include paths that make them refer to two different files.
3. The resulting cmake rules will be included in CMakeLists.txt in
"project_path"
"""

import argparse
import collections
import functools
import hashlib
import os
import pathlib
import re
import subprocess
from typing import DefaultDict, Dict, Iterable, List, Set, Tuple

DIR_PATH = pathlib.Path(__file__).parent
GENERATOR_BINARY = "generate_lttng_for_files"
GENERATOR_PATH = DIR_PATH / GENERATOR_BINARY

COMBINED_COMMAND_PATH = DIR_PATH / "generate_combined_header.py"
COMBINED_GENERATED_FILE = "generated_lttng"

C_FILE_EXTENSIONS = (".c", ".cxx", ".cpp")
CODE_FILE_EXTENSIONS = C_FILE_EXTENSIONS + (".h", ".hpp")

CMAKE_CURRENT_SOURCE_DIR = "${CMAKE_CURRENT_SOURCE_DIR}"
CMAKE_BINARY_DIR = "${CMAKE_BINARY_DIR}"


class CodeFile:
  """Class to represent a code and the direct files it includes"""

  path: str
  direct_includes: Set[str]

  def __init__(self, path: str, direct_includes: Set[str]):
    self.path = path
    self.direct_includes = direct_includes

  def __repr__(self) -> str:
    return "{{ {path} : {direct_includes} }}".format(
        path=self.path, direct_includes=self.direct_includes
    )


class CmakeRulesGenerator:

  _provider_include_base_path: str
  _traces_output_dir: str
  _unique_prefix: str
  _include_path: str
  _filter_out: List[str]
  _generate_command: str
  _combine_command: str

  def __init__(
      self,
      project_path: str,
      provider_include_base_path: str,
      traces_output_dir: str,
      unique_prefix: str,
      filter_out: List[str],
      include_path: str,
  ):
    self._provider_include_base_path = provider_include_base_path
    self._traces_output_dir = traces_output_dir
    self._unique_prefix = unique_prefix
    self._include_path = include_path
    self._filter_out = filter_out

    self._generate_command = GENERATOR_PATH
    self._combine_command = COMBINED_COMMAND_PATH

  def _filter_code_files(self, files: Iterable[str]) -> List[str]:
    return [file for file in files if file.endswith(CODE_FILE_EXTENSIONS)]

  def _get_direct_includes(self, file_path: str) -> Set[str]:
    includes = set()
    with open(file_path, "r") as f:
      for line in f:
        m = re.match(r'#include ["\<](.*)["\>]', line)
        if not m:
          continue

        include = m.group(1)
        if include == os.path.join(
            self._provider_include_base_path, COMBINED_GENERATED_FILE + ".h"
        ):
          # Ignore the combined generated file that we create
          continue

        if not include.endswith(CODE_FILE_EXTENSIONS):
          # Ignore includes that don't end with code extensions.
          # They might be system includes. for example <string>
          continue

        includes.add(include)

    return includes

  def _get_code_files_dict(
      self, code_file_paths: Iterable[str]
  ) -> DefaultDict[str, List[CodeFile]]:
    """Returns a dictionary from file name to a list of CodeFiles.

    Note that there can be several CodeFiles with the same name, but different
    full paths. this is because you can #include <b/a.h> and or #include <c/a.h>
    and they are different files with the same name.
    """

    code_files_dict = collections.defaultdict(list)  # default is an empty list
    for file in code_file_paths:
      file_name = os.path.basename(file)
      code_files_dict[file_name].append(
          CodeFile(file, self._get_direct_includes(file))
      )

    return code_files_dict

  def _get_provider_name(self, file: str) -> str:
    """Returns the provider name from the include file path.

    This is done by removing the include base path and the following .h

    So, for example, provider1 can be included in the code by:
    #include "auto/provider1.h"

    In this case, provider_include_base_path is "auto" and this function
    will return "provider1"

    If the file doesn't match the include base path, this returns None
    """
    match = re.match("{}/(.*).h".format(self._provider_include_base_path), file)
    if not match:
      return None

    provider = match.group(1)
    if provider == COMBINED_GENERATED_FILE:
      # Ignore the combined generated file that we create
      return None

    return provider

  def _is_provider_file(self, file: str) -> bool:
    return self._get_provider_name(file) is not None

  def _get_code_file(
      self, file: str, code_files_dict: DefaultDict[str, List[CodeFile]]
  ) -> CodeFile:
    file_name = os.path.basename(file)
    code_files = code_files_dict[file_name]
    for code_file in code_files:
      if code_file.path == file:
        return code_file

    return None

  def _get_header_code_file(
      self, header: str, code_files_dict: DefaultDict[str, List[CodeFile]]
  ) -> CodeFile:
    file_name = os.path.basename(header)
    code_files = code_files_dict[file_name]
    for code_file in code_files:
      if code_file.path.endswith(header):
        return code_file

    return None

  def _get_file_provider_dependencies(
      self,
      file: str,
      code_files_dict: DefaultDict[str, List[CodeFile]],
      files_provider_dependencies: Dict[str, Set[str]],
      additional_dependencies: DefaultDict[str, Set[str]],
      parent_file: str,
  ) -> Set[str]:
    """This function finds all the auto-generated providers this file depends on.

    This includes any providers that are directly included, or through other
    file includes.

    This is done recursively, by looking at all the direct includes, and seeing
    if any of them are includes of an auto-generated provider. Any other
    includes are handled by recursively calling this function again for the
    other include and adding the results.

    If file1.c has the following includes:
    #include "auto/provider1.h"
    #include "file2.h"

    Then the result is:
    _get_file_provider_dependencies("file1.c") =
        [ provider1 ] + _get_file_provider_dependencies("file2.h")

    To avoid infinite loops, when we first see a file we add it as None to
    the result dictionary. If we encounter it again we return as if it has
    no dependencies but add to additional_dependencies that whoever called
    it depends on it as well.

    Once we finish generating all the initial dependencies, we go over the
    additional_dependencies and also add them to the result dictionary.
    """

    if file in files_provider_dependencies:
      deps = files_provider_dependencies[file]
      if deps is not None:
        return deps

      # There is a circular dependency. In order to solve that we return that
      # the dependencies for the current file are empty, but we save it in
      # additional dependencies. Then once the initial pass is done we will
      # add all the additional dependencies.
      assert (
          parent_file
      ), "We have a circular dependency, but no parent file. Impossible..."

      if file != parent_file:
        additional_dependencies[parent_file].add(file)
      return set()
    else:
      # This is the first time we search for this file
      files_provider_dependencies[file] = None

    code_file = self._get_code_file(file, code_files_dict)
    if not code_file:
      print(
          "WARNING! {} included {}, but it doesn't seem to exist.".format(
              parent_file, file
          )
      )
      del files_provider_dependencies[file]
      return set()

    dependencies = set()
    for include in code_file.direct_includes:
      if self._is_provider_file(include):
        dependencies.add(self._get_provider_name(include))

      include_code_file = self._get_header_code_file(include, code_files_dict)
      if not include_code_file:
        # This include doesn't have a code file - It is an external header
        # include
        continue
      dependencies.update(
          self._get_file_provider_dependencies(
              include_code_file.path,
              code_files_dict,
              files_provider_dependencies,
              additional_dependencies,
              parent_file=file,
          )
      )

    files_provider_dependencies[file] = dependencies
    return dependencies

  def _add_file_additional_dependencies(
      self,
      file: str,
      files_provider_dependencies: Dict[str, Set[str]],
      additional_dependencies: DefaultDict[str, Set[str]],
  ) -> None:
    file_additional_dependencies = additional_dependencies[file]
    for deps in file_additional_dependencies:
      self._add_file_additional_dependencies(
          deps, files_provider_dependencies, additional_dependencies
      )
      files_provider_dependencies[file].update(
          files_provider_dependencies[deps]
      )

  def _add_additional_dependencies(
      self,
      files_provider_dependencies: Dict[str, Set[str]],
      additional_dependencies: DefaultDict[str, Set[str]],
  ) -> None:
    for file in list(additional_dependencies.keys()):
      self._add_file_additional_dependencies(
          file, files_provider_dependencies, additional_dependencies
      )

  def _get_files_provider_dependencies(
      self,
      code_file_paths: List[str],
      code_files_dict: DefaultDict[str, List[CodeFile]],
  ) -> Dict[str, Set[str]]:
    """Returns a dictionary of file paths to a list of provider header

    files this file depends on.
    """

    files_provider_dependencies = {}
    additional_dependencies = collections.defaultdict(set)
    for file in code_file_paths:
      self._get_file_provider_dependencies(
          file,
          code_files_dict,
          files_provider_dependencies,
          additional_dependencies,
          parent_file=None,
      )

    self._add_additional_dependencies(
        files_provider_dependencies, additional_dependencies
    )
    return files_provider_dependencies

  def _get_provider_file_dependencies(
      self, files_provider_dependencies: Dict[str, Set[str]]
  ) -> DefaultDict[str, Set[str]]:
    provider_file_dependencies = collections.defaultdict(set)
    for file, providers in files_provider_dependencies.items():
      for provider in providers:
        provider_file_dependencies[provider].add(file)

    return provider_file_dependencies

  def _generate_cmake_targets_for_provider(
      self, provider: str, dependencies: Iterable[str]
  ) -> str:
    c_files = " ".join(
        os.path.abspath(file)
        for file in dependencies
        if file.endswith(C_FILE_EXTENSIONS)
    )
    deps = " ".join(os.path.abspath(file) for file in dependencies)
    include_path_option = ""
    if self._include_path:
      include_path_option = "--include_path {}".format(self._include_path)

    out = (
        "add_custom_command(OUTPUT {traces_output_dir}/{provider}.h\n\tCOMMAND"
        ' bash -c "{generate_command} --output_dir {traces_output_dir}'
        " --compile_commands_dir {current_binary_dir}"
        " {include_path_option} --provider {provider}"
        ' {c_files}"\n\tWORKING_DIRECTORY {current_source_dir}\n\tDEPENDS'
        " {generator_binary} {dependencies}\n\tCOMMENT Generating header file"
        " for lttng trace provider: {provider}\n)\n\n".format(
            provider=provider,
            generate_command=self._generate_command,
            traces_output_dir=self._traces_output_dir,
            c_files=c_files,
            dependencies=deps,
            generator_binary=GENERATOR_BINARY,
            include_path_option=include_path_option,
            current_source_dir=CMAKE_CURRENT_SOURCE_DIR,
            current_binary_dir=CMAKE_BINARY_DIR,
        )
    )

    out += (
        "add_custom_target({unique_prefix}{provider}_target\n"
        "\tDEPENDS {traces_output_dir}/{provider}.h\n)\n\n".format(
            provider=provider,
            traces_output_dir=self._traces_output_dir,
            unique_prefix=self._unique_prefix,
        )
    )

    return out

  def _generate_cmake_targets(
      self, provider_file_dependencies: DefaultDict[str, Set[str]]
  ) -> str:
    out = 'execute_process(COMMAND bash -c "mkdir -p {}")\n\n'.format(
        self._traces_output_dir
    )

    # Sort providers and dependencies for a predictable output
    providers = list(provider_file_dependencies.keys())
    providers.sort()
    for provider in providers:
      dependencies = list(provider_file_dependencies[provider])
      dependencies.sort()
      out += self._generate_cmake_targets_for_provider(provider, dependencies)

    # Generate combined headers with all providers
    providers = provider_file_dependencies.keys()
    combined_providers = " ".join([
        "{}/{}.h".format(self._provider_include_base_path, prov)
        for prov in providers
    ])
    target_deps = " ".join(
        ["{}{}_target".format(self._unique_prefix, prov) for prov in providers]
    )

    out += (
        "add_custom_command(OUTPUT"
        " {traces_output_dir}/{combined_file}.h\n\tWORKING_DIRECTORY"
        " {current_source_dir}\n\tDEPENDS {target_deps}\n\tCOMMAND"
        " {combine_command} --output {traces_output_dir}/{combined_file}.h"
        " {combined_providers}\n)\n\n".format(
            combined_providers=combined_providers,
            traces_output_dir=self._traces_output_dir,
            target_deps=target_deps,
            combined_file=COMBINED_GENERATED_FILE,
            combine_command=self._combine_command,
            current_source_dir=CMAKE_CURRENT_SOURCE_DIR,
        )
    )

    out += (
        "add_custom_target({unique_prefix}generate_lttng_trace_headers\n"
        "\tDEPENDS {traces_output_dir}/{combined_file}.h\n)\n\n".format(
            traces_output_dir=self._traces_output_dir,
            combined_file=COMBINED_GENERATED_FILE,
            unique_prefix=self._unique_prefix,
        )
    )

    return out

  @functools.lru_cache(maxsize=1024)
  def _is_cmake_dir(self, dir_path: str) -> bool:
    return os.path.exists(os.path.join(dir_path, "CMakeLists.txt"))

  def _get_file_hash(self, file: str) -> str:
    hash_len = 6
    return hashlib.md5(file.encode()).hexdigest()[:hash_len]

  def _generate_file_properties(self, dependant_files: Iterable[str]) -> str:
    out = ""
    for file in dependant_files:
      if not file.endswith(C_FILE_EXTENSIONS):
        continue
      file_path = os.path.abspath(file)

      out += (
          "set_source_files_properties({file_path} \n\tPROPERTIES"
          " COMPILE_DEFINITIONS "
          '"__COMPILATION_UNIT_HASH__={file_hash}"\n)\n\n'.format(
              file_path=file_path,
              file_hash=self._get_file_hash(file),
          )
      )

    return out

  def _should_exclude_file(self, file: str) -> bool:
    return any(f in file for f in self._filter_out)

  def generate_cmake_rules(self) -> Tuple[str, str]:
    all_files = subprocess.check_output(
        ["find", "-type", "f"], encoding="utf-8", universal_newlines=True
    ).splitlines()

    filtered_files = [
        file for file in all_files if not self._should_exclude_file(file)
    ]

    code_file_paths = self._filter_code_files(filtered_files)

    code_files_dict = self._get_code_files_dict(code_file_paths)

    files_provider_dependencies = self._get_files_provider_dependencies(
        code_file_paths, code_files_dict
    )
    provider_file_dependencies = self._get_provider_file_dependencies(
        files_provider_dependencies
    )

    if not provider_file_dependencies:
      # Empty dependencies. No need to do anything
      return "", ""

    cmake_targets = self._generate_cmake_targets(provider_file_dependencies)

    dependant_files = [
        f for f in files_provider_dependencies if files_provider_dependencies[f]
    ]
    # Sort order for predictable output
    dependant_files.sort()

    file_properties = self._generate_file_properties(dependant_files)
    return cmake_targets, file_properties


def parse_args() -> Tuple[str, str, str, str, List[str], str, str, str]:
  parser = argparse.ArgumentParser(
      prog="generate_cmake_rules",
      description=(
          "This script generates cmake rules for LTTNG trace generation"
      ),
  )
  parser.add_argument(
      "--project_path",
      help="The path of the project we generate traces in",
      required=True,
  )
  parser.add_argument(
      "--provider_include_base_path",
      help=(
          "When a provider is included in the project, the include should look"
          ' like: #include "<provider_include_base_path>/provider.h"'
      ),
      required=True,
  )
  parser.add_argument(
      "--traces_output_dir",
      help="Output directory for generated traces",
      required=True,
  )
  parser.add_argument(
      "--unique_prefix",
      help="Unique prefix to add to targets",
      required=False,
      default=None,
  )
  parser.add_argument(
      "--filter_out",
      help="Directory patterns to filter out",
      required=False,
      default=None,
      nargs="*",
  )
  parser.add_argument(
      "--include_path",
      help=(
          "Include path to to generate headers relative to."
          " If not given, we use the include path given in compile_commands"
      ),
      required=False,
      default=None,
  )
  parser.add_argument(
      "--targets_output_path",
      help="Path for output file that contains CMake target definitions",
      required=True,
  )
  parser.add_argument(
      "--file_properties_output_path",
      help="Path for output file that contains file property hash definition",
      required=False,
  )
  args = parser.parse_args()

  project_path = os.path.abspath(args.project_path)
  provider_include_base_path = args.provider_include_base_path.rstrip("/")
  traces_output_relative_path = os.path.relpath(
      args.traces_output_dir, project_path
  )
  traces_output_dir = os.path.abspath(args.traces_output_dir)

  unique_prefix = args.unique_prefix + "_" if args.unique_prefix else ""

  filter_out = args.filter_out or []
  # always filter out the traces output dir
  filter_out.append(traces_output_relative_path)

  targets_output_path = os.path.abspath(args.targets_output_path)

  return (
      project_path,
      provider_include_base_path,
      traces_output_dir,
      unique_prefix,
      filter_out,
      args.include_path,
      targets_output_path,
      args.file_properties_output_path,
  )


def write_output_if_changed(targets_output_path: str, result: str) -> None:
  try:
    with open(targets_output_path, "r") as f:
      current_content = f.read()
      if current_content == result:
        # Dependencies didn't change, no need to write anything
        return
  except FileNotFoundError:
    # File doesn't exists, needs to be written
    pass

  # Content changed - write it to the output file
  with open(targets_output_path, "w") as f:
    f.write(result)


def main() -> None:
  (
      project_path,
      provider_include_base_path,
      traces_output_dir,
      unique_prefix,
      filter_out,
      include_path,
      targets_output_path,
      file_properties_output_path,
  ) = parse_args()

  os.chdir(project_path)
  generator = CmakeRulesGenerator(
      project_path,
      provider_include_base_path,
      traces_output_dir,
      unique_prefix,
      filter_out,
      include_path,
  )

  cmake_targets, file_properties = generator.generate_cmake_rules()

  write_output_if_changed(targets_output_path, cmake_targets)
  if file_properties_output_path:
    write_output_if_changed(file_properties_output_path, file_properties)


if __name__ == "__main__":
  main()
