Scala: Haskell 本の引き写しでパーサジェネレータ

プログラミング Haskell」の第 8 章「関数型パーサ」の写経をしようかと思ったのですが、そのまま引き写しても面白くないので、Scala で書いてみました。せっかくですので、Haskell のモナド連結の糖衣構文 do を、極力そのまま Scala の for 式で書けるよう、Parser[A] には、map(), flatMap() を備え、Scala の型とします。

…できた。Haskell 版から変えたところとしては、入力文字列にパーサを適用した際には、Scala らしく Option[(A, String)] を返すようにした、Haskell で [Char] であるところは、適宜 String にした、程度か。案外そのまま行けますね、というよりも、Scala が Haskell を色濃く反映しているんですね。

abstract trait Parser[A] {
  def apply(inp: String): Option[(A, String)]
  def parse(inp: String) = apply(inp)
  def map[B](f: (A) => B) = {
    def p_parse = parse _
    new Parser[B] {
      def apply(inp: String) = {
        p_parse(inp) match {
          case None => None
          case Some((v, out)) => Some((f(v), out))
        }
      }
    }
  }
  def flatMap[B](f: (A) => Parser[B]): Parser[B] = {
    def p_parse = parse _
    new Parser[B] {
      def apply(inp: String) = {
        p_parse(inp) match {
          case None => None
          case Some((v, out)) => f(v)(out)
        }
      }
    }
  }
  def +++(q: Parser[A]) = {
    def p_parse = parse _
    new Parser[A] {
      def apply(inp: String) = {
        p_parse(inp) match {
          case None => q.parse(inp)
          case s @ Some(_) => s
        }
      }
    }
  }
}

def return_[A](v: A): Parser[A] = {
  new Parser[A] { def apply(inp: String) = { Some((v, inp)) } }
}

def failure[A]: Parser[A] = {
  new Parser[A] { def apply(inp: String) = { None } }
}

def item: Parser[Char] = new Parser[Char] {
  def apply(inp: String) = inp match {
    case "" => None
    case _ => Some((inp.head, inp.tail))
  }
}

def parse[A](p: Parser[A], inp: String): Option[(A, String)] = p(inp)

/*

parse(return_(1), "abc")
res1: Option[(Int, String)] = Some((1,abc))

parse(failure, "abc")
res2: Option[(Nothing, String)] = None

parse(item, "abc")
res3: Option[(Char, String)] = Some((a,bc))

 */

/*

def p = for {
  x <- item
  _ <- item
  y <- item
} yield (x, y)
p: Parser[(Char, Char)]

def p_dash = item.flatMap(x =>
  item.flatMap(__ =>
    item.map(y =>
      (x, y) )))
p_dash: Parser[(Char, Char)]

parse(p, "abcdef")
res4: Option[((Char, Char), String)] = Some(((a,c),def))

parse(p, "ab")
res5: Option[((Char, Char), String)] = None

 */

/*

parse(item +++ return_('d'), "abc")
res6: Option[(Char, String)] = Some((a,bc))

parse(failure +++ return_('d'), "abc")
res7: Option[(Char, String)] = Some((d,abc))

parse(failure +++ failure, "abc")
res8: Option[(Nothing, String)] = None

 */

def sat(p: (Char) => Boolean): Parser[Char] = for {
  x <- item
  r <- if (p(x)) return_(x) else failure
} yield (r)

def digit = sat(_.isDigit)

def lower = sat(_.isLower)

def upper = sat(_.isUpper)

def letter = sat(_.isLetter)

def alphanum = letter +++ digit

def char(x: Char) = sat(x.==)

/*

parse(digit, "123")
res9: Option[(Char, String)] = Some((1,23))

parse(digit, "abc")
res10: Option[(Char, String)] = None

parse(char('a'), "abc")
res11: Option[(Char, String)] = Some((a,bc))

parse(char('a'), "123")
res12: Option[(Char, String)] = None

 */

// これは文字列を返すパーサとする。Scala の String は List[Char] ではない
def string(s: String): Parser[String] = s match {
  case "" => return_("")
  case _ => for {
    x <- char(s.head)
    xs <- string(s.tail)
  } yield (x +: xs)
}

/*

parse(string("abc"), "abcdef")
res13: Option[(String, String)] = Some((abc,def))

parse(string("abc"), "ab1234")
res14: Option[(String, String)] = None

 */

object Many {
  def many[A](p: Parser[A]): Parser[List[A]] = {
    many1(p) +++ return_(Nil)
  }
  def many1[A](p: Parser[A]): Parser[List[A]] = {
    for {
      v <- p
      vs <- many(p)
    } yield (v :: vs)
  }
}
import Many._

/*

parse(many(digit), "123abc")
res15: Option[(List[Char], String)] = Some((List(1, 2, 3),abc))

parse(many(digit), "abcdef")
res16: Option[(List[Char], String)] = Some((List(),abcdef))

parse(many1(digit), "abcdef")
res17: Option[(List[Char], String)] = None

 */

// 仕方ないので文字列にしておく
def ident = for {
  x <- lower
  xs <- many(alphanum)
} yield ((x :: xs).foldLeft("")(_ + _))

// これも
def nat: Parser[Int] = for {
  xs <- many1(digit)
} yield (xs.foldLeft("")(_ + _).toInt)

def space: Parser[Unit] = for {
  _ <- many(sat(_.isWhitespace))
} yield (Unit)

/*

parse(ident, "abc def")
res0: Option[(java.lang.String, String)] = Some((abc, def))

parse(nat, "123 abc")
res1: Option[(Int, String)] = Some((123, abc))

parse(space, " abc")
res2: Option[(Unit, String)] = Some(((),abc))

 */

def token[A](p: Parser[A]): Parser[A] = for {
  _ <- space
  v <- p
  _ <- space
} yield (v)

def identifier: Parser[String] = token(ident)

def natural: Parser[Int] = token(nat)

def symbol(xs: String): Parser[String] = token(string(xs))

/*

def p: Parser[List[Int]] = for {
  _ <- symbol("[")
  n <- natural
  ns <- many(
    for {
      _ <- symbol(",")
      x <- natural
    } yield (x)
  )
  _ <- symbol("]")
} yield (n :: ns)

parse(p, " [1, 2, 3] ")
res19: Option[(List[Int], String)] = Some((List(1, 2, 3),))

parse(p, "[1, 2,]")
res20: Option[(List[Int], String)] = None

 */

def int: Parser[Int] = (
  for {
    _ <- symbol("-")
    n <- natural
  } yield (- n)
) +++ natural

object Expr {
  def expr: Parser[Int] = for {
    t <- term
    r <- (
      for {
        _ <- symbol("+")
        e <- expr
      } yield (t + e)
    ) +++ (
      for {
        _ <- symbol("-")
        e <- expr
      } yield (t - e)
    ) +++ return_(t)
  } yield (r)
  def term: Parser[Int] = for {
    f <- factor
    r <- (
      for {
        _ <- symbol("*")
        t <- term
      } yield (f * t)
    ) +++ (
      for {
        _ <- symbol("/")
        t <- term
      } yield (f / t)
    ) +++ return_(f)
  } yield (r)
  def factor: Parser[Int] = (
    for {
      _ <- symbol("(")
      e <- expr
      _ <- symbol(")")
    } yield (e)
  ) +++ int
}
import Expr._

/*

parse(expr, "2*3+4")
res26: Option[(Int, String)] = Some((10,))

parse(expr, "2*(3+4)")
res28: Option[(Int, String)] = Some((14,))

parse(expr, "2 * (3 + 4)")
res30: Option[(Int, String)] = Some((14,))

parse(expr, "2*3-4")
res33: Option[(Int, String)] = Some((2,))

parse(expr, "-1")
res36: Option[(Int, String)] = Some((-1,))

parse(expr, "(-3 * 4 * - 2) / 6")
res43: Option[(Int, String)] = Some((4,))

 */

Leave a Reply