Skip to content

Commit

Permalink
Add full Unicode support
Browse files Browse the repository at this point in the history
`dconf dump` includes the Unicode and the format exported by dconf2nix
such as `\129315` really means nothing to Nix.

> The functions that deal with GVariant text format absolutely always deal in UTF-8.
> Conceptually, GVariant text format is a string of Unicode characters, not bytes.
> Non-ASCII but otherwise printable Unicode characters are not treated any differently from normal ASCII characters.

Let’s ensure they are printed as such.

Also fix the test data from e2b5065,
they were copied as reported by `parserTraced` but the actual data
was mostly Unicode with few escape sequences.
  • Loading branch information
jtojnar committed Apr 14, 2024
1 parent 217a6ee commit 7f5673d
Show file tree
Hide file tree
Showing 7 changed files with 80 additions and 19 deletions.
2 changes: 1 addition & 1 deletion data/dconf.settings
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ locations=[<(uint32 2, <('Gdańsk', 'EPGD', true, [(0.94916821905848536, 0.32230
region='en_US.UTF-8'

[issue28/desktop/ibus/panel/emoji]
favorites=['\8211', '\8594', '\8593', '\8595', '\8482', '\\u00ad', '\176', '\\v', '\160', '\171', '\8451']
favorites=['', '', '', '', '', '\u00ad', '°', '\v', ' ', '«', '']

[issue28/org/gnome/desktop/input-sources]
mru-sources=[('xkb', 'us+altgr-intl'), ('ibus', 'mozc-jp')]
Expand Down
3 changes: 3 additions & 0 deletions data/unicode.settings
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[org/gnome/font-manager]
compare-preview-text='🤣'
preview-text='příliš žluťoučký kůň úpěl ďábelské ódy\nprilis zlutoucky kun upel dabelske ody\n◌̍◌̍ff̍ ̍čěů\n◌̎\n'
6 changes: 3 additions & 3 deletions output/dconf.nix
Original file line number Diff line number Diff line change
Expand Up @@ -361,20 +361,20 @@ with lib.hm.gvariant;
};

"org/gnome/Weather" = {
locations = [ (mkVariant [ (mkUint32 2) (mkVariant [ "Gda\324sk" "EPGD" true [ (mkTuple [ 0.9491682190584854 0.3223041410193837 ]) ] [ (mkTuple [ 0.9485864484589182 0.32579479952337237 ]) ] ]) ]) (mkVariant [ (mkUint32 2) (mkVariant [ "Gdynia, Dzia\322dowo County, Warmian-Masurian Voivodeship" "" false [ (mkTuple [ 0.9302794944578734 0.34699627038777753 ]) ] [ (mkTuple [ 0.938610530426954 0.3574455077502486 ]) ] ]) ]) (mkVariant [ (mkUint32 2) (mkVariant [ "Gdynia, Pomeranian Voivodeship" "" false [ (mkTuple [ 0.9514923902475622 0.3235888220312407 ]) ] [ (mkTuple [ 0.9485864484589182 0.32579479952337237 ]) ] ]) ]) ];
locations = [ (mkVariant [ (mkUint32 2) (mkVariant [ "Gdańsk" "EPGD" true [ (mkTuple [ 0.9491682190584854 0.3223041410193837 ]) ] [ (mkTuple [ 0.9485864484589182 0.32579479952337237 ]) ] ]) ]) (mkVariant [ (mkUint32 2) (mkVariant [ "Gdynia, Działdowo County, Warmian-Masurian Voivodeship" "" false [ (mkTuple [ 0.9302794944578734 0.34699627038777753 ]) ] [ (mkTuple [ 0.938610530426954 0.3574455077502486 ]) ] ]) ]) (mkVariant [ (mkUint32 2) (mkVariant [ "Gdynia, Pomeranian Voivodeship" "" false [ (mkTuple [ 0.9514923902475622 0.3235888220312407 ]) ] [ (mkTuple [ 0.9485864484589182 0.32579479952337237 ]) ] ]) ]) ];
};

"org/gnome/shell/weather" = {
automatic-location = true;
locations = [ (mkVariant [ (mkUint32 2) (mkVariant [ "Gda\324sk" "EPGD" true [ (mkTuple [ 0.9491682190584854 0.3223041410193837 ]) ] [ (mkTuple [ 0.9485864484589182 0.32579479952337237 ]) ] ]) ]) (mkVariant [ (mkUint32 2) (mkVariant [ "Gdynia, Dzia\322dowo County, Warmian-Masurian Voivodeship" "" false [ (mkTuple [ 0.9302794944578734 0.34699627038777753 ]) ] [ (mkTuple [ 0.938610530426954 0.3574455077502486 ]) ] ]) ]) (mkVariant [ (mkUint32 2) (mkVariant [ "Gdynia, Pomeranian Voivodeship" "" false [ (mkTuple [ 0.9514923902475622 0.3235888220312407 ]) ] [ (mkTuple [ 0.9485864484589182 0.32579479952337237 ]) ] ]) ]) ];
locations = [ (mkVariant [ (mkUint32 2) (mkVariant [ "Gdańsk" "EPGD" true [ (mkTuple [ 0.9491682190584854 0.3223041410193837 ]) ] [ (mkTuple [ 0.9485864484589182 0.32579479952337237 ]) ] ]) ]) (mkVariant [ (mkUint32 2) (mkVariant [ "Gdynia, Działdowo County, Warmian-Masurian Voivodeship" "" false [ (mkTuple [ 0.9302794944578734 0.34699627038777753 ]) ] [ (mkTuple [ 0.938610530426954 0.3574455077502486 ]) ] ]) ]) (mkVariant [ (mkUint32 2) (mkVariant [ "Gdynia, Pomeranian Voivodeship" "" false [ (mkTuple [ 0.9514923902475622 0.3235888220312407 ]) ] [ (mkTuple [ 0.9485864484589182 0.32579479952337237 ]) ] ]) ]) ];
};

"system/locale" = {
region = "en_US.UTF-8";
};

"issue28/desktop/ibus/panel/emoji" = {
favorites = [ "\137" "\396" "\395" "\397" "\322" "\\u00ad" "\176" "\\v" "\160" "\171" "\297" ];
favorites = [ "" "" "" "" "" "­" "°" "" " " "«" "" ];
};

"issue28/org/gnome/desktop/input-sources" = {
Expand Down
14 changes: 14 additions & 0 deletions output/unicode.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Generated via dconf2nix: https://github.com/gvolpe/dconf2nix
{ lib, ... }:

with lib.hm.gvariant;

{
dconf.settings = {
"org/gnome/font-manager" = {
compare-preview-text = "🤣";
preview-text = "příliš žluťoučký kůň úpěl ďábelské ódy\nprilis zlutoucky kun upel dabelske ody\n◌̍◌̍ff̍ ̍čěů\n◌̎\n";
};

};
}
50 changes: 36 additions & 14 deletions src/DConf.hs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@ module DConf
)
where

import Control.Monad ( replicateM )
import Data.Ix ( inRange )
import qualified Data.Map as Map
import Data.Maybe ( catMaybes )
import Data.Text ( Text )
import qualified Data.Text as T
import DConf.Data
Expand Down Expand Up @@ -66,24 +69,43 @@ vString = T.pack <$> (single <|> double)
where
single = bracket "'" "'" $ inputs "'"
double = bracket "\"" "\"" $ inputs "\""

lchar :: [Char] -> Parsec Text () Char
lchar extra = charExcept $ "\r\n" <> extra
octal = do
isOctal <- optionMaybe $ char '8'
let
base = case isOctal of
Just _ -> 8
Nothing -> 10
a <- digit
b <- digit
c <- digit
return $ chr $ (digitToInt a * base * base) + (digitToInt b * base) + digitToInt c
qchar :: Parsec Text () Char
lchar extra = charExcept $ "\r\n\\" <> extra

fromHexDigit :: Char -> Int
fromHexDigit n | inRange ('A', 'F') n = ord n - ord 'A' + 0xA
fromHexDigit n | inRange ('a', 'f') n = ord n - ord 'a' + 0xA
fromHexDigit n | inRange ('0', '9') n = ord n - ord '0'
fromHexDigit n = error $ "Expected a hexadecimal digit, '" ++ n : "' given"

hexNum :: Int -> Parsec Text () Int
hexNum l = do
digits <- replicateM l (fromHexDigit <$> hexDigit)
return $ foldl (\acc d -> 16 * acc + d) 0 digits

qchar :: Parsec Text () (Maybe Char)
qchar = do
_ <- char '\\'
octal <|> anyChar
(
-- Unicode escapes of the form `\uxxxx` and `\Uxxxxxxxx` are supported, in hexadecimal.
(char 'u' *> (Just <$> (chr <$> hexNum 4)))
<|> (char 'U' *> (Just <$> (chr <$> hexNum 8)))
-- The usual control sequence escapes `\a`, `\b`, `\f`, `\n`, `\r`, `\t` and `\v` are supported.
<|> (char 'a' *> pure (Just '\a'))
<|> (char 'b' *> pure (Just '\b'))
<|> (char 'f' *> pure (Just '\f'))
<|> (char 'n' *> pure (Just '\n'))
<|> (char 'r' *> pure (Just '\r'))
<|> (char 't' *> pure (Just '\t'))
<|> (char 'v' *> pure (Just '\v'))
-- Additionally, a `\` before a newline character causes the newline to be ignored.
<|> (char '\n' *> pure Nothing)
-- Finally, any other character following `\` is copied literally (for example, `\"` or `\\`)
<|> (Just <$> anyChar))

inputs :: [Char] -> Parsec Text () String
inputs extra = many $ qchar <|> lchar extra
inputs extra = catMaybes <$> (many $ qchar <|> (Just <$> lchar extra))

value :: Parsec Text () Value
value = choice
Expand Down
13 changes: 12 additions & 1 deletion src/Nix.hs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ module Nix
)
where

import Data.Function ( (&) )
import qualified Data.Map as Map
import qualified Data.Text as T
import DConf.Data
Expand Down Expand Up @@ -68,7 +69,7 @@ renderValue raw = Nix $ renderValue' raw <> ";"
renderList xs = let
in "[ " <> T.intercalate " " (renderItem <$> xs) <> " ]"

renderValue' (S v) = T.pack $ show v
renderValue' (S v) = renderString v
renderValue' (B v) = T.toLower . T.pack $ show v
renderValue' (I v) = T.pack $ show v
renderValue' (D v) = T.pack $ show v
Expand All @@ -81,3 +82,13 @@ renderValue raw = Nix $ renderValue' raw <> ";"
"''\n" <> mkSpaces 8 <> T.strip v <> "\n" <> mkSpaces 6 <> "''"
renderValue' (R kvs) =
"{\n" <> mconcat (fmap (\(k,v) -> mkSpaces 8 <> k <> " = " <> renderValue' v <> ";\n") kvs) <> mkSpaces 6 <> "}"

renderString :: T.Text -> T.Text
renderString text = "\"" <> escaped <> "\""
where
escaped =
text
& T.replace "\\" "\\\\"
& T.replace "\n" "\\n"
& T.replace "$" "\\$"
& T.replace "\"" "\\\""
11 changes: 11 additions & 0 deletions test/DConf2NixTest.hs
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,17 @@ prop_dconf2nix_tuples :: Property
prop_dconf2nix_tuples =
withTests (10 :: TestLimit) dconf2nixTuples

dconf2nixUnicode :: Property
dconf2nixUnicode =
let input = "data/unicode.settings"
output = "output/unicode.nix"
root = Root T.empty
in baseProperty input output root

prop_dconf2nix_unicode :: Property
prop_dconf2nix_unicode =
withTests (10 :: TestLimit) dconf2nixUnicode

dconf2nixEmoji :: Property
dconf2nixEmoji =
let input = "data/emoji.settings"
Expand Down

0 comments on commit 7f5673d

Please sign in to comment.