1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
|
{-# LANGUAGE OverloadedStrings #-}
{-|
Module: Text.Emoji.Data
Description:
-}
module Text.Emoji.DataFiles where
import Prelude hiding (takeWhile)
import Text.Emoji.Types
import Control.Applicative ((<|>))
import Data.Attoparsec.Text (Parser (..), takeWhile1, takeWhile, string, choice
, notInClass, skipWhile, skipMany, isHorizontalSpace
, decimal, hexadecimal, char, many1, endOfLine)
import Data.Text (Text)
import qualified Data.Text as T
import Data.Word (Word32)
type EmojiTest = [EmojiTestEntry]
data EmojiTestEntry
= Group EmojiTestGroupLevel Text [EmojiTestEntry]
| Entry [Word32] EmojiStatus EmojiVersion Text
| Comment Text
deriving (Show, Eq, Ord)
data EmojiTestGroupLevel
= EmojiTestGroup
| EmojiTestSubgroup
deriving (Show, Eq, Ord, Enum)
groupLevelText :: EmojiTestGroupLevel -> Text
groupLevelText EmojiTestGroup = "group"
groupLevelText EmojiTestSubgroup = "subgroup"
notSpace :: Char -> Bool
notSpace = notInClass " \t"
notEol :: Char -> Bool
notEol = notInClass "\n"
skipSpace :: Parser ()
skipSpace = skipWhile isHorizontalSpace
codePointsColumn :: Parser [Word32]
codePointsColumn = many1 (hexadecimal <* char ' ')
statusColumn :: Parser EmojiStatus
statusColumn =
(string "fully-qualified" >> pure (EmojiStatusCharacter FullyQualified)) <|>
(string "minimally-qualified" >> pure (EmojiStatusCharacter MinimallyQualified)) <|>
(string "unqualified" >> pure (EmojiStatusCharacter Unqualified)) <|>
(string "component" >> pure (EmojiStatusComponent))
emojiTestGroup :: EmojiTestGroupLevel -> Parser EmojiTestEntry
emojiTestGroup maxLevel = do
char '#'
skipSpace
string $ groupLevelText maxLevel
char ':'
skipSpace
name <- takeWhile1 notEol
skipMany endOfLine
let groupParser =
if maxLevel == EmojiTestGroup
then [ emojiTestGroup EmojiTestSubgroup ]
else []
groupEntries <- many1 . choice $
groupParser ++ [ emojiTestEntryLine, emojiTestCommentLine ]
pure $ Group EmojiTestGroup name groupEntries
emojiVersionColumn :: Parser EmojiVersion
emojiVersionColumn = do
char 'E'
major <- decimal
char '.'
minor <- decimal
pure $ case major of
0 -> case minor of
-- E0.0: pre emoji without specific Unicode Version
0 -> NoEmojiVersion Nothing
-- E0.x: Pre emoji with Unicode Version
_ -> NoEmojiVersion (Just minor)
-- Ex.y: Regular Emoji Version
_ -> EmojiVersion major minor
emojiTestEntryLine :: Parser EmojiTestEntry
emojiTestEntryLine = do
codePoints <- codePointsColumn
skipSpace
string "; "
status <- statusColumn
skipSpace
string "# "
skipWhile (notInClass "E")
version <- emojiVersionColumn
skipSpace
shortName <- takeWhile1 notEol
skipMany endOfLine
pure $ Entry codePoints status version shortName
emojiTestCommentLine :: Parser EmojiTestEntry
emojiTestCommentLine = do
char '#'
skipSpace
text <- takeWhile notEol <* skipMany endOfLine
if "group:" `T.isPrefixOf` text || "subgroup:" `T.isPrefixOf` text
then fail "group, not comment"
else pure $ Comment text
emojiTestFile :: Parser EmojiTest
emojiTestFile = many1 $
emojiTestGroup EmojiTestGroup <|> emojiTestEntryLine <|> emojiTestCommentLine
-- | Helper Function that counts number of lines used to parse 'EmojiTest'.
-- Useful to check against LoC of @emoji-test.txt@ for parser sanity check.
countLines :: EmojiTest -> Integer
countLines ((Group _ _ x):xs) = 1 + countLines x + countLines xs
countLines ((Comment _):xs) = 1 + countLines xs
countLines ((Entry _ _ _ _):xs) = 1 + countLines xs
countLines [] = 0
|