------------------------------------------------------------------------
-- The Agda standard library
--
-- Examples of regular expressions and matching
------------------------------------------------------------------------
{-# OPTIONS --with-K #-}
module README.Text.Regex where
open import Data.Bool using (true; false)
open import Data.List.Base using (_∷_; [])
open import Data.String
open import Function.Base using () renaming (_$′_ to _$_)
open import Relation.Nullary.Decidable using (yes)
open import Relation.Nullary.Decidable using (True; False; from-yes)
-- Our library available via the Text.Regex module is safe but it works on
-- lists of characters.
-- To use it on strings we have to rely on unsafe theorems about the
-- conversions between strings and lists of characters being inverses.
-- For convenience we use the following unsafe module for this README.
open import Text.Regex.String.Unsafe
------------------------------------------------------------------------
-- Defining regular expressions
-- The type of regular expressions is Exp.
-- Some examples of regular expressions using:
-- [_] for the union of ranges it contains
-- _─_ for a range
-- singleton for an exact character
-- _∙_ for the concatenation of two regular expressions
-- _∣_ for the sum of two regular expressions
-- _⋆ for the Kleene star (zero or more matches of the regular expression)
-- _⁇ for an optional regular expression
ℕ* : Exp
ℕ* = [ '1' ─ '9' ∷ [] ] -- a non-zero digit
∙ [ '0' ─ '9' ∷ [] ] ⋆ -- followed by zero or more digits
ℕ : Exp
ℕ = ℕ* ∣ singleton '0' -- ℕ* or exactly 0
ℤ : Exp
ℤ = ((singleton '-') ⁇ ∙ ℕ*) -- an optional minus sign followed by a ℕ*
∣ singleton '0' -- or exactly 0
------------------------------------------------------------------------
-- An expression's semantics
-- The semantics of these regular expression is defined in terms of the
-- lists of characters they match. The type (str ∈ e) states that the
-- string str matches the expression e.
-- It is decidable, and the proof is called _∈?_.
-- We can run it on a few examples to check that it matches our intuition:
-- Valid: starts with a non-zero digit, followed by 3 digits
_ : True ("1848" ∈? ℕ*)
_ = _
-- Valid: exactly 0
_ : True ("0" ∈? ℕ)
_ = _
-- Invalid: starts with a leading 0
_ : False ("007" ∈? ℕ)
_ = _
-- Invalid: no negative ℕ number
_ : False ("-666" ∈? ℕ)
_ = _
-- Valid: a negative integer
_ : True ("-666" ∈? ℤ)
_ = _
-- Invalid: no negative 0
_ : False ("-0" ∈? ℤ)
_ = _
------------------------------------------------------------------------
-- Matching algorithms
-- The proof that _∈_ is decidable gives us the ability to check whether
-- a whole string matches a regular expression. But we may want to use
-- other matching algorithms detecting a prefix, infix, or suffix of the
-- input string that matches the regular expression.
-- This is what the Regex type gives us.
-- For instance, the following value corresponds to finding an infix
-- substring matching the string "agda" or "agdai"
agda : Exp
agda = singleton 'a'
∙ singleton 'g'
∙ singleton 'd'
∙ singleton 'a'
∙ (singleton 'i' ⁇)
infixAgda : Regex
infixAgda = record
{ fromStart = false
; tillEnd = false
; expression = agda
}
-- The search function gives us the ability to look for matches
-- Valid: agda in the middle
_ : True (search "Maria Magdalena" infixAgda)
_ = _
-- By changing the value of fromStart and tillEnd we can control where the
-- substring should be. We can insist on the match being at the end of the
-- input for instance:
suffixAgda : Regex
suffixAgda = record
{ fromStart = false
; tillEnd = true
; expression = agda
}
-- Invalid: agda is in the middle
_ : False (search "Maria Magdalena" suffixAgda)
_ = _
-- Valid: agda as a suffix
_ : True (search "README.agda" suffixAgda)
_ = _
-- Valid: agdai as a suffix
_ : True (search "README.agdai" suffixAgda)
_ = _
------------------------------------------------------------------------
-- Advanced uses
-- Search does not just return a boolean, it returns an informative answer.
-- Infix matches are for instance represented using the `Infix` relation on
-- list. Such a proof pinpoints the exact position of the match:
open import Data.List.Relation.Binary.Infix.Heterogeneous
open import Data.List.Relation.Binary.Infix.Heterogeneous.Properties
open import Data.List.Relation.Binary.Pointwise using (≡⇒Pointwise-≡)
open import Relation.Binary.PropositionalEquality
-- Here is an example of a match: it gives back the substring, the inductive
-- proof that it is accepted by the regular expression and its precise location
-- inside the input string
mariamAGDAlena : Match "Maria Magdalena" infixAgda
mariamAGDAlena = record
{ string = "agda" -- we have found "agda"
; match = from-yes ("agda" ∈? agda) -- a proof of the match
; related = proof -- and its location
}
where
proof : Infix _≡_ (toList "agda") (toList "Maria Magdalena")
proof = toList "Maria M"
++ⁱ fromPointwise (≡⇒Pointwise-≡ refl)
ⁱ++ toList "lena"
-- And here is the proof that search returns such an object
_ : search "Maria Magdalena" infixAgda ≡ yes mariamAGDAlena
_ = refl