diff options
author | sternenseemann <git@lukasepple.de> | 2020-04-12 21:03:41 +0200 |
---|---|---|
committer | sternenseemann <git@lukasepple.de> | 2020-04-12 21:03:41 +0200 |
commit | e81d77b968bbe0b514352606fea163c0fae4668e (patch) | |
tree | 48f5bf913fb5f1e8384ef855c718b680eacd0db9 | |
parent | beeb3420354b56dff691a8f4b4f54bb1b683adad (diff) |
add parser for emoji-test.txt
-rw-r--r-- | emoji-generic.cabal | 6 | ||||
-rw-r--r-- | emoji-generic.nix | 8 | ||||
-rw-r--r-- | src/Text/Emoji/DataFiles.hs | 126 | ||||
-rw-r--r-- | test/Main.hs | 32 |
4 files changed, 167 insertions, 5 deletions
diff --git a/emoji-generic.cabal b/emoji-generic.cabal index 96d084d..23362c0 100644 --- a/emoji-generic.cabal +++ b/emoji-generic.cabal @@ -17,13 +17,14 @@ extra-source-files: README.md library exposed-modules: Text.Emoji + , Text.Emoji.DataFiles , Text.Emoji.Types , Text.Emoji.String , Text.Emoji.Text -- other-modules: -- other-extensions: build-depends: base >=4.13 && <4.14 - , text + , text ^>=1.2.4.0 , attoparsec ^>=0.13.2.0 , file-embed , utf8-light ^>=0.4.2 @@ -40,4 +41,7 @@ test-suite test , tasty ^>=1.2.3 , tasty-smallcheck ^>=0.8.1 , smallcheck ^>=1.1.5 + , tasty-hunit ^>=0.10.0.2 + , attoparsec ^>=0.13.2.0 + , text ^>=1.2.4.0 default-language: Haskell2010 diff --git a/emoji-generic.nix b/emoji-generic.nix index 4da50d5..c187749 100644 --- a/emoji-generic.nix +++ b/emoji-generic.nix @@ -1,5 +1,5 @@ -{ mkDerivation, attoparsec, base, file-embed, stdenv, tasty -, tasty-smallcheck, text, utf8-light +{ mkDerivation, attoparsec, base, file-embed, smallcheck, stdenv +, tasty, tasty-hunit, tasty-smallcheck, text, utf8-light }: mkDerivation { pname = "emoji-generic"; @@ -8,7 +8,9 @@ mkDerivation { libraryHaskellDepends = [ attoparsec base file-embed text utf8-light ]; - testHaskellDepends = [ tasty tasty-smallcheck ]; + testHaskellDepends = [ + base smallcheck tasty tasty-hunit tasty-smallcheck + ]; description = "A generic Emoji library"; license = stdenv.lib.licenses.lgpl3; } diff --git a/src/Text/Emoji/DataFiles.hs b/src/Text/Emoji/DataFiles.hs new file mode 100644 index 0000000..0e2755c --- /dev/null +++ b/src/Text/Emoji/DataFiles.hs @@ -0,0 +1,126 @@ +{-# LANGUAGE OverloadedStrings #-} +{-| + Module: Text.Emoji.Data + Description: +-} + + +module Text.Emoji.DataFiles where + +import Prelude hiding (takeWhile) + +import Text.Emoji.Types + +import Control.Applicative ((<|>)) +import Data.Attoparsec.Text (Parser (..), takeWhile1, takeWhile, string + , notInClass, skipWhile, skipMany, isHorizontalSpace + , decimal, hexadecimal, char, many1, endOfLine) +import Data.Text (Text) +import Data.Word (Word32) + +type EmojiTest = [EmojiTestEntry] + +data EmojiTestEntry + = Group EmojiTestGroupLevel Text [EmojiTestEntry] + | Entry [Word32] EmojiStatus EmojiVersion Text + | Comment Text + deriving (Show, Eq, Ord) + +data EmojiTestGroupLevel + = EmojiTestGroup + | EmojiTestSubgroup + deriving (Show, Eq, Ord, Enum) + +groupLevelText :: EmojiTestGroupLevel -> Text +groupLevelText EmojiTestGroup = "group" +groupLevelText EmojiTestSubgroup = "subgroup" + +notSpace :: Char -> Bool +notSpace = notInClass " \t" + +notEol :: Char -> Bool +notEol = notInClass "\n" + +skipSpace :: Parser () +skipSpace = skipWhile isHorizontalSpace + +codePointsColumn :: Parser [Word32] +codePointsColumn = many1 (hexadecimal <* char ' ') + +statusColumn :: Parser EmojiStatus +statusColumn = + (string "fully-qualified" >> pure (EmojiStatusCharacter FullyQualified)) <|> + (string "minimally-qualified" >> pure (EmojiStatusCharacter MinimallyQualified)) <|> + (string "unqualified" >> pure (EmojiStatusCharacter Unqualified)) <|> + (string "component" >> pure (EmojiStatusComponent)) + +emojiTestGroup :: EmojiTestGroupLevel -> Parser EmojiTestEntry +emojiTestGroup maxLevel = do + char '#' + skipSpace + + string $ groupLevelText maxLevel + char ':' + skipSpace + + name <- takeWhile1 notEol + skipMany endOfLine + + let addMiddleParser = if maxLevel == EmojiTestGroup + then (<|> emojiTestGroup EmojiTestSubgroup) + else id + groupEntries <- many1 + (addMiddleParser emojiTestEntryLine <|> emojiTestCommentLine) + + pure $ Group EmojiTestGroup name groupEntries + +emojiVersionColumn :: Parser EmojiVersion +emojiVersionColumn = do + char 'E' + major <- decimal + char '.' + minor <- decimal + pure $ case major of + 0 -> case minor of + -- E0.0: pre emoji without specific Unicode Version + 0 -> NoEmojiVersion Nothing + -- E0.x: Pre emoji with Unicode Version + _ -> NoEmojiVersion (Just minor) + -- Ex.y: Regular Emoji Version + _ -> EmojiVersion major minor + +emojiTestEntryLine :: Parser EmojiTestEntry +emojiTestEntryLine = do + codePoints <- codePointsColumn + skipSpace + + string "; " + status <- statusColumn + skipSpace + + string "# " + skipWhile (notInClass "E") + version <- emojiVersionColumn + skipSpace + + shortName <- takeWhile1 notEol + skipMany endOfLine + + pure $ Entry codePoints status version shortName + +emojiTestCommentLine :: Parser EmojiTestEntry +emojiTestCommentLine = char '#' >> skipSpace >> + (((string "group:" <|> string "subgroup:") >> fail "group, not comment") <|> + (takeWhile notEol <* skipMany endOfLine) >>= pure . Comment) + +emojiTestFile :: Parser EmojiTest +emojiTestFile = many1 $ + emojiTestGroup EmojiTestGroup <|> emojiTestEntryLine <|> emojiTestCommentLine + +-- | Helper Function that counts number of lines used to parse 'EmojiTest'. +-- Useful to check against LoC of @emoji-test.txt@ for parser sanity check. +countLines :: EmojiTest -> Integer +countLines ((Group _ _ x):xs) = 1 + countLines x + countLines xs +countLines ((Comment _):xs) = 1 + countLines xs +countLines ((Entry _ _ _ _):xs) = 1 + countLines xs +countLines [] = 0 diff --git a/test/Main.hs b/test/Main.hs index 9493099..3706de8 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -1,17 +1,25 @@ +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE FlexibleInstances, MultiParamTypeClasses #-} module Main where import Test.Tasty import qualified Test.Tasty.SmallCheck as SC +import qualified Test.Tasty.HUnit as HU import Test.SmallCheck.Series import Text.Emoji.Types +import Text.Emoji.DataFiles + +import Data.Attoparsec.Text (parseOnly) +import Data.Either (fromRight) +import Data.Text (Text) +import qualified Data.Text as T (unpack) main :: IO () main = defaultMain tests tests :: TestTree -tests = testGroup "Tests" [typesTests] +tests = testGroup "Tests" [typesTests, parserTests] typesTests :: TestTree typesTests = testGroup "Emoji Types Tests" @@ -26,3 +34,25 @@ emojiVersionLowestValue :: EmojiVersion -> Bool emojiVersionLowestValue v = NoEmojiVersion Nothing <= v instance Monad m => Serial m EmojiVersion + +parserTests :: TestTree +parserTests = testGroup "Emoji Data File Parser Tests" + [ HU.testCase "EmojiVersion Parser parses all versions up till now correctly" emojiVersionParserTest ] + +emojiVersionParserTest :: HU.Assertion +emojiVersionParserTest = do + vt "E0.0" (NoEmojiVersion Nothing) + vt "E0.6" (NoEmojiVersion (Just 6)) + vt "E0.7" (NoEmojiVersion (Just 7)) + vt "E1.0" (EmojiVersion 1 0) + vt "E2.0" (EmojiVersion 2 0) + vt "E3.0" (EmojiVersion 3 0) + vt "E4.0" (EmojiVersion 4 0) + vt "E5.0" (EmojiVersion 5 0) + vt "E11.0" (EmojiVersion 11 0) + vt "E12.0" (EmojiVersion 12 0) + vt "E12.1" (EmojiVersion 12 1) + vt "E13.0" (EmojiVersion 13 0) + where vt :: Text -> EmojiVersion -> HU.Assertion + vt str exp = HU.assertBool (T.unpack str) . fromRight False $ + (== exp) <$> parseOnly emojiVersionColumn str |