// file      : butl/path -*- C++ -*-
// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
// license   : MIT; see accompanying LICENSE file

#ifndef BUTL_PATH
#define BUTL_PATH

#include <string>
#include <cstddef>    // ptrdiff_t
#include <utility>    // move()
#include <iterator>
#include <exception>
#include <functional> // hash

namespace butl
{
  class invalid_path_base: std::exception
  {
  public:
    virtual char const*
    what () const throw ();
  };

  template <typename C>
  class invalid_basic_path: public invalid_path_base
  {
  public:
    typedef std::basic_string<C> string_type;

    invalid_basic_path (C const* p): path_ (p) {}
    invalid_basic_path (string_type const& p): path_ (p) {}
    ~invalid_basic_path () throw () {}

    string_type const&
    path () const
    {
      return path_;
    }

  private:
    string_type path_;
  };

  template <typename C>
  struct path_traits
  {
    typedef std::basic_string<C> string_type;
    typedef typename string_type::size_type size_type;

    // Canonical directory and path seperators.
    //
#ifdef _WIN32
    static C const directory_separator = '\\';
    static C const path_separator = ';';
#else
    static C const directory_separator = '/';
    static C const path_separator = ':';
#endif

    // Directory separator tests. On some platforms there
    // could be multiple seperators. For example, on Windows
    // we check for both '/' and '\'.
    //
    static bool
    is_separator (C c)
    {
#ifdef _WIN32
      return c == '\\' || c == '/';
#else
      return c == '/';
#endif
    }

    static size_type
    find_separator (string_type const& s, size_type pos = 0)
    {
      const C* r (find_separator (s.c_str () + pos, s.size () - pos));
      return r != nullptr ? r - s.c_str () : string_type::npos;
    }

    static const C*
    find_separator (const C* s, size_type n)
    {
      for (const C* e (s + n); s != e; ++s)
      {
        if (is_separator (*s))
          return s;
      }

      return nullptr;
    }

    static size_type
    rfind_separator (string_type const& s, size_type pos = string_type::npos)
    {
      if (pos == string_type::npos)
        pos = s.size ();
      else
        pos++;

      for (; pos > 0; --pos)
      {
        if (is_separator (s[pos - 1]))
          return pos - 1;
      }

      return string_type::npos;
    }

    // Return the position of '.' or npos if there is no extension.
    //
    static size_type
    find_extension (string_type const& s)
    {
      size_type i (s.size ());

      for (; i > 0; --i)
      {
        C c (s[i - 1]);

        if (c == '.')
          break;

        if (is_separator (c))
        {
          i = 0;
          break;
        }
      }

      // Weed out paths like ".txt" (and "/.txt") and "txt.".
      //
      if (i > 1 && !is_separator (s[i - 2]) && i != s.size ())
        return i - 1;
      else
        return string_type::npos;
    }

    static int
    compare (string_type const& l, string_type const& r)
    {
      return compare (l.c_str (), l.size (), r.c_str (), r.size ());
    }

    // @@ Currently for case-insensitive filesystems (Windows) compare()
    // works properly only for ASCII.
    //
    static int
    compare (const C* l, size_type ln, const C* r, size_t rn)
    {
      for (size_type i (0), n (ln < rn ? ln : rn); i != n; ++i)
      {
#ifdef _WIN32
        C lc (tolower (l[i])), rc (tolower (r[i]));
#else
        C lc (l[i]), rc (r[i]);
#endif
        if (is_separator (lc) && is_separator (rc))
          continue;

        if (lc < rc) return -1;
        if (lc > rc) return 1;
      }

      return ln < rn ? -1 : (ln > rn ? 1 : 0);
    }

    // Get/set current working directory. Throw std::system_error to report
    // the underlying OS errors.
    //
    static string_type
    current ();

    static void
    current (string_type const&);

    // Return the user home directory. Throw std::system_error to report the
    // underlying OS errors.
    //
    static string_type
    home ();

    // Return the temporary directory. Throw std::system_error to report the
    // underlying OS errors.
    //
    static string_type
    temp_directory ();

    // Return a temporary name. The name is constructed by starting with the
    // prefix followed by the process id following by a unique counter value
    // inside the process. Throw std::system_error to report the underlying OS
    // errors.
    //
    static string_type
    temp_name (string_type const& prefix);

    // Make the path real (by calling realpath(3)). Throw invalid_basic_path
    // if the path is invalid (e.g., some components do not exist) and
    // std::system_error to report other underlying OS errors.
    //
#ifndef _WIN32
    static void
    realize (string_type&);
#endif

  private:
#ifdef _WIN32
    static C
    tolower (C);
#endif
  };

  template <typename C>
  class invalid_basic_path;

  template <typename C, typename K>
  class basic_path;

  // Cast from one path kind to another without any checking or
  // processing.
  //
  template <class P, class C, class K> P path_cast (const basic_path<C, K>&);
  template <class P, class C, class K> P path_cast (basic_path<C, K>&&);

  template <typename C>
  class path_data;

  template <typename C>
  struct dir_path_kind;

  template <typename C>
  struct any_path_kind
  {
    typedef path_data<C> base_type;
    typedef basic_path<C, dir_path_kind<C>> dir_type;
  };

  template <typename C>
  struct dir_path_kind
  {
    typedef basic_path<C, any_path_kind<C>> base_type;
    typedef basic_path<C, dir_path_kind<C>> dir_type;
  };

  typedef basic_path<char, any_path_kind<char>> path;
  typedef basic_path<char, dir_path_kind<char>> dir_path;
  typedef invalid_basic_path<char> invalid_path;

  typedef basic_path<wchar_t, any_path_kind<wchar_t>> wpath;
  typedef basic_path<wchar_t, dir_path_kind<wchar_t>> dir_wpath;
  typedef invalid_basic_path<wchar_t> invalid_wpath;

  template <typename C>
  class path_data
  {
  public:
    typedef std::basic_string<C> string_type;

    path_data () = default;

    explicit
    path_data (string_type s): path_ (std::move (s)) {}

  protected:
    string_type path_;
  };

  template <typename C, typename K>
  class basic_path: public K::base_type
  {
  public:
    typedef std::basic_string<C> string_type;
    typedef typename string_type::size_type size_type;

    typedef typename K::base_type base_type;
    typedef typename K::dir_type dir_type;

    typedef path_traits<C> traits;

    struct iterator;
    typedef std::reverse_iterator<iterator> reverse_iterator;

    // Create a special empty path. Note that we have to provide our
    // own implementation rather than using '=default' to make clang
    // allow default-initialized const instances of this type.
    //
    basic_path () {};

    // Constructors that initialize a path from a string argument throw the
    // invalid_path exception if the string is not a valid path (e.g., uses
    // unsupported path notations on Windows).
    //
    explicit
    basic_path (C const* s): base_type (s) {init (this->path_);}

    basic_path (C const* s, size_type n)
        : base_type (string_type (s, n)) {init (this->path_);}

    explicit
    basic_path (string_type s): base_type (std::move (s)) {init (this->path_);}

    basic_path (const string_type& s, size_type n)
        : base_type (string_type (s, 0, n)) {init (this->path_);}

    basic_path (const string_type& s, size_type p, size_type n)
        : base_type (string_type (s, p, n)) {init (this->path_);}

    // Create a path using the exact string representation. If the string is
    // not a valid path or if it would require a modification, then empty path
    // is created instead and the passed string rvalue-reference is left
    // untouched. Note that no exception is thrown if the path is invalid. See
    // also string()&& below.
    //
    enum exact_type {exact};
    basic_path (string_type&& s, exact_type)
    {
      if (init (s, true))
        this->path_ = std::move (s);
    }

    // Create a path as a sub-path identified by the [begin, end)
    // range of components.
    //
    basic_path (const iterator& begin, const iterator& end);

    basic_path (const reverse_iterator& rbegin, const reverse_iterator& rend)
        : basic_path (rend.base (), rbegin.base ()) {}

    void
    swap (basic_path& p) {this->path_.swap (p.path_);}

    void
    clear () {this->path_.clear ();}

    // Get/set current working directory. Throw std::system_error
    // to report the underlying OS errors.
    //
    static dir_type
    current () {return dir_type (traits::current ());}

    static void
    current (basic_path const&);

    // Return the user home directory. Throw std::system_error to report the
    // underlying OS errors.
    //
    static dir_type
    home () {return dir_type (traits::home ());}

    // Return the temporary directory. Throw std::system_error to report the
    // underlying OS errors.
    //
    static dir_type
    temp_directory () {return dir_type (traits::temp_directory ());}

    // Return a temporary path. The path is constructed by starting with the
    // temporary directory and then appending a path component consisting of
    // the prefix followed by the process id following by a unique counter
    // value inside the process. Throw std::system_error to report the
    // underlying OS errors.
    //
    static basic_path
    temp_path (const string_type& prefix)
    {
      return temp_directory () / basic_path (traits::temp_name (prefix));
    }

  public:
    bool
    empty () const {return this->path_.empty ();}

    size_type
    size () const {return this->path_.size ();}

    // Return true if this path doesn't have any directories. Note
    // that "/foo" is not a simple path (it is "foo" in root directory)
    // while "/" is (it is the root directory).
    //
    bool
    simple () const;

    bool
    absolute () const;

    bool
    relative () const
    {
      return !absolute ();
    }

    bool
    root () const;

    // Return true if *this is a sub-path of the specified path (i.e.,
    // the specified path is a prefix). Expects both paths to be
    // normalized. Note that this function returns true if the paths
    // are equal. Empty path is considered a prefix of any path.
    //
    bool
    sub (const basic_path&) const;

    // Return true if *this is a super-path of the specified path (i.e.,
    // the specified path is a suffix). Expects both paths to be
    // normalized. Note that this function returns true if the paths
    // are equal. Empty path is considered a suffix of any path.
    //
    bool
    sup (const basic_path&) const;

  public:
    // Return the path without the directory part.
    //
    basic_path
    leaf () const;

    // Return the path without the specified directory part. Throws
    // invalid_path if the directory is not a prefix of *this. Expects
    // both paths to be normalized.
    //
    basic_path
    leaf (basic_path const&) const;

    // Return the directory part of the path or empty path if
    // there is no directory.
    //
    dir_type
    directory () const;

    // Return the directory part of the path without the specified
    // leaf part. Throws invalid_path if the leaf is not a suffix of
    // *this. Expects both paths to be normalized.
    //
    dir_type
    directory (basic_path const&) const;

    // Return the root directory of the path or empty path if
    // the directory is not absolute.
    //
    dir_type
    root_directory () const;

    // Return the path without the extension, if any.
    //
    basic_path
    base () const;

    // Return the extension or NULL if not present. If not NULL, then
    // the result points to the character past the dot but it is legal
    // to decrement it once to obtain the value with the dot.
    //
    const C*
    extension () const;

    // Return a path relative to the specified path that is equivalent
    // to *this. Throws invalid_path if a relative path cannot be derived
    // (e.g., paths are on different drives on Windows).
    //
    basic_path
    relative (basic_path) const;

    // Iteration over path components.
    //
  public:
    struct iterator
    {
      typedef string_type value_type;
      typedef string_type* pointer;
      typedef string_type reference;
      typedef std::ptrdiff_t difference_type;
      typedef std::bidirectional_iterator_tag iterator_category;

      typedef typename string_type::size_type size_type;

      iterator (): p_ (nullptr) {}
      iterator (const string_type& p, size_type b, size_type e)
          : p_ (&p), b_ (b), e_ (e) {}

      iterator&
      operator++ ()
      {
        b_ = e_;

        if (b_ != string_type::npos)
          e_ = traits::find_separator (*p_, ++b_);

        return *this;
      }

      iterator&
      operator-- ()
      {
        e_ = b_;

        b_ = e_ == string_type::npos // Last component?
          ? traits::rfind_separator (*p_)
          : (--e_ == 0 // First empty component?
             ? string_type::npos
             : traits::rfind_separator (*p_, e_ - 1));

        b_ = b_ == string_type::npos // First component?
          ? 0
          : b_ + 1;

        return *this;
      }

      iterator
      operator++ (int) {iterator r (*this); operator++ (); return r;}

      iterator
      operator-- (int) {iterator r (*this); operator-- (); return r;}

      string_type operator* () const
      {
        return string_type (*p_, b_, (e_ != string_type::npos ? e_ - b_ : e_));
      }

      pointer operator-> () const = delete;

      friend bool
      operator== (const iterator& x, const iterator& y)
      {
        return x.p_ == y.p_ && x.b_ == y.b_ && x.e_ == y.e_;
      }

      friend bool
      operator!= (const iterator& x, const iterator& y) {return !(x == y);}

    private:
      friend class basic_path;

      // b != npos && e == npos - last component
      // b == npos && e == npos - one past last component (end)
      //
      const string_type* p_;
      size_type b_;
      size_type e_;
    };

    iterator begin () const;
    iterator end () const;

    reverse_iterator rbegin () const {return reverse_iterator (end ());}
    reverse_iterator rend () const {return reverse_iterator (begin ());}

  public:
    // Normalize the path. This includes collapsing the '.' and '..'
    // directories if possible, collapsing multiple directory
    // separators, and converting all directory separators to the
    // canonical form. Return *this.
    //
    basic_path&
    normalize ();

    // Make the path absolute using the current directory unless
    // it is already absolute. Return *this.
    //
    basic_path&
    complete ();

    // Make the path real, that is, absolute, normalized, and with resolved
    // symlinks. On POSIX systems this is accomplished with the call to
    // realpath(3). On Windows -- complete() and normalize(). Return *this.
    //
    basic_path&
    realize ();

  public:
    basic_path&
    operator/= (basic_path const&);

    // Append a single path component (must not contain directory separators)
    // as a string, without first constructing the path object.
    //
    basic_path&
    operator/= (string_type const&);

    basic_path&
    operator/= (const C*);

    basic_path
    operator+ (string_type const& s) const
    {
      return basic_path (this->path_ + s);
    }

    basic_path
    operator+ (const C* s) const
    {
      return basic_path (this->path_ + s);
    }

    basic_path
    operator+ (C c) const
    {
      return basic_path (this->path_ + c);
    }

    basic_path&
    operator+= (string_type const& s)
    {
      this->path_ += s;
      return *this;
    }

    basic_path&
    operator+= (const C* s)
    {
      this->path_ += s;
      return *this;
    }

    basic_path&
    operator+= (C c)
    {
      this->path_ += c;
      return *this;
    }

    // Note that comparison is case-insensitive if the filesystem is
    // not case-sensitive (e.g., Windows).
    //
    template <typename K1>
    int
    compare (const basic_path<C, K1>& x) const {
      return traits::compare (this->path_, x.path_);}

  public:
    const string_type&
    string () const& {return this->path_;}

    // Moves the underlying path string out of the path object. The
    // path object becomes empty. Usage: std::move (p).string ().
    //
    string_type
    string () && {string_type r; r.swap (this->path_); return r;}

    // If possible, return a POSIX representation of the path. For example,
    // for a Windows path in the form foo\bar this function will return
    // foo/bar. If it is not possible to create a POSIX representation for
    // this path (e.g., c:\foo), this function will throw the invalid_path
    // exception.
    //
    string_type
    posix_string () const;

  protected:
    basic_path (string_type s, bool i): base_type (std::move (s))
    {
      if (i)
        init (this->path_);
    }

    // Common implementation for operator=/().
    //
    void
    combine (const C*, size_type);

  private:
    template <class P, class C1, class K1>
    friend P butl::path_cast (const basic_path<C1, K1>&);

    template <class P, class C1, class K1>
    friend P butl::path_cast (basic_path<C1, K1>&&);

    // If exact is true, return whether the initialization was successful,
    // that is, the passed string is a valid path and no modifications were
    // necessary. Otherwise (extact is false), throw invalid_path if the
    // string is not a valid path (e.g., uses an unsupported path notation on
    // Windows).
    //
    bool
    init (string_type& s, bool exact = false);
  };

  template <typename C, typename K>
  inline basic_path<C, K>
  operator/ (basic_path<C, K> const& x, basic_path<C, K> const& y)
  {
    basic_path<C, K> r (x);
    r /= y;
    return r;
  }

  template <typename C, typename K>
  inline basic_path<C, K>
  operator/ (basic_path<C, K> const& x, std::basic_string<C> const& y)
  {
    basic_path<C, K> r (x);
    r /= y;
    return r;
  }

  template <typename C, typename K>
  inline basic_path<C, K>
  operator/ (basic_path<C, K> const& x, const C* y)
  {
    basic_path<C, K> r (x);
    r /= y;
    return r;
  }

  template <typename C, typename K1, typename K2>
  inline bool
  operator== (const basic_path<C, K1>& x, const basic_path<C, K2>& y)
  {
    return x.compare (y) == 0;
  }

  template <typename C, typename K1, typename K2>
  inline bool
  operator!= (const basic_path<C, K1>& x, const basic_path<C, K2>& y)
  {
    return !(x == y);
  }

  template <typename C, typename K1, typename K2>
  inline bool
  operator< (const basic_path<C, K1>& x, const basic_path<C, K2>& y)
  {
    return x.compare (y) < 0;
  }

  // Additional operators for certain path kind combinations.
  //
  template <typename C>
  inline basic_path<C, any_path_kind<C>>
  operator/ (basic_path<C, dir_path_kind<C>> const& x,
             basic_path<C, any_path_kind<C>> const& y)
  {
    basic_path<C, any_path_kind<C>> r (x);
    r /= y;
    return r;
  }

  // For operator<< (ostream) see the path-io header.
}

namespace std
{
  template <typename C, typename K>
  struct hash<butl::basic_path<C, K>>: hash<basic_string<C>>
  {
    size_t
    operator() (const butl::basic_path<C, K>& p) const noexcept
    {
      return hash<basic_string<C>>::operator() (p.string ());
    }
  };
}

#include <butl/path.ixx>
#include <butl/path.txx>

#endif // BUTL_PATH