------------------------------------------------------------------------
-- The Agda standard library
--
-- Examples of regular expressions and matching
------------------------------------------------------------------------

{-# OPTIONS --with-K #-}

module README.Text.Regex where

open import Data.Bool using (true; false)
open import Data.List.Base using (_∷_; [])
open import Data.String
open import Function.Base using () renaming (_$′_ to _$_)
open import Relation.Nullary.Decidable using (yes)
open import Relation.Nullary.Decidable using (True; False; from-yes)

-- Our library available via the Text.Regex module is safe but it works on
-- lists of characters.

-- To use it on strings we have to rely on unsafe theorems about the
-- conversions between strings and lists of characters being inverses.
-- For convenience we use the following unsafe module for this README.
open import Text.Regex.String.Unsafe

------------------------------------------------------------------------
-- Defining regular expressions

-- The type of regular expressions is Exp.

-- Some examples of regular expressions using:

-- [_]        for the union of ranges it contains
-- _─_        for a range
-- singleton  for an exact character
-- _∙_        for the concatenation of two regular expressions
-- _∣_        for the sum of two regular expressions
-- _⋆         for the Kleene star (zero or more matches of the regular expression)
-- _⁇         for an optional regular expression

ℕ* : Exp
ℕ* = [ '1'  '9'  [] ]   -- a non-zero digit
    [ '0'  '9'  [] ]  -- followed by zero or more digits

 : Exp
 = ℕ*  singleton '0' -- ℕ* or exactly 0

 : Exp
 = ((singleton '-')   ℕ*) -- an optional minus sign followed by a ℕ*
   singleton '0'            -- or exactly 0

------------------------------------------------------------------------
-- An expression's semantics

-- The semantics of these regular expression is defined in terms of the
-- lists of characters they match. The type (str ∈ e) states that the
-- string str matches the expression e.

-- It is decidable, and the proof is called _∈?_.
-- We can run it on a few examples to check that it matches our intuition:

-- Valid: starts with a non-zero digit, followed by 3 digits
_ : True ("1848" ∈? ℕ*)
_ = _

-- Valid: exactly 0
_ : True ("0" ∈? )
_ = _

-- Invalid: starts with a leading 0
_ : False ("007" ∈? )
_ = _

-- Invalid: no negative ℕ number
_ : False ("-666" ∈? )
_ = _

-- Valid: a negative integer
_ : True ("-666" ∈? )
_ = _

-- Invalid: no negative 0
_ : False ("-0" ∈? )
_ = _

------------------------------------------------------------------------
-- Matching algorithms

-- The proof that _∈_ is decidable gives us the ability to check whether
-- a whole string matches a regular expression. But we may want to use
-- other matching algorithms detecting a prefix, infix, or suffix of the
-- input string that matches the regular expression.

-- This is what the Regex type gives us.

-- For instance, the following value corresponds to finding an infix
-- substring matching the string "agda" or "agdai"

agda : Exp
agda = singleton 'a'
      singleton 'g'
      singleton 'd'
      singleton 'a'
      (singleton 'i' )

infixAgda : Regex
infixAgda = record
  { fromStart  = false
  ; tillEnd    = false
  ; expression = agda
  }

-- The search function gives us the ability to look for matches

-- Valid: agda in the middle
_ : True (search "Maria Magdalena" infixAgda)
_ = _

-- By changing the value of fromStart and tillEnd we can control where the
-- substring should be. We can insist on the match being at the end of the
-- input for instance:

suffixAgda : Regex
suffixAgda = record
  { fromStart  = false
  ; tillEnd    = true
  ; expression = agda
  }

-- Invalid: agda is in the middle
_ : False (search "Maria Magdalena" suffixAgda)
_ = _

-- Valid: agda as a suffix
_ : True (search "README.agda" suffixAgda)
_ = _

-- Valid: agdai as a suffix
_ : True (search "README.agdai" suffixAgda)
_ = _


------------------------------------------------------------------------
-- Advanced uses

-- Search does not just return a boolean, it returns an informative answer.
-- Infix matches are for instance represented using the `Infix` relation on
-- list. Such a proof pinpoints the exact position of the match:

open import Data.List.Relation.Binary.Infix.Heterogeneous
open import Data.List.Relation.Binary.Infix.Heterogeneous.Properties
open import Data.List.Relation.Binary.Pointwise using (≡⇒Pointwise-≡)
open import Relation.Binary.PropositionalEquality

-- Here is an example of a match: it gives back the substring, the inductive
-- proof that it is accepted by the regular expression and its precise location
-- inside the input string
mariamAGDAlena : Match "Maria Magdalena" infixAgda
mariamAGDAlena = record
  { string  = "agda"                     -- we have found "agda"
  ; match   = from-yes ("agda" ∈? agda)  -- a proof of the match
  ; related = proof                      -- and its location
  }

  where

    proof : Infix _≡_ (toList "agda") (toList "Maria Magdalena")
    proof = toList "Maria M"
        ++ⁱ fromPointwise (≡⇒Pointwise-≡ refl)
        ⁱ++ toList "lena"


-- And here is the proof that search returns such an object
_ : search "Maria Magdalena" infixAgda  yes mariamAGDAlena
_ = refl