JSON.stringify With Escape For Unicode-Characters

You may be familiar with PHP’s json_encode which allows you to do wonderful things such as giving you a ASCII safe version of Unicode-content (which is the default, and if you actually do want to return a plain Unicode content you need to specify JSON_UNESCAPED_UNICODE flag).

In your browser (or NodeJS in that matter..) JSON.stringify do not include a built-in parser for you to use, but it DOES include an option to specify a “replacer” function, which acts as the parser, naturally there is a lot more free-ground to play around, but if the only thing you need is something similar to PHP’s json_encode read ahead..

A nifty little trick I’m going to use next is a regular-expression replacing using Unicode values of all characters with code above 160 (decimal) to the maximum value for a single Unicode-character which is 65535.
Although we can tolerate character-values of up until 255, those above 160 are generally not-considered ASCII safe, and includes special control-characters and language-directional control which might break the text-stream.

  • The the usual “\u….” encoding:
    [
    {description: 'ascii'
    ,upper_case:  ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z']
    ,lower_case:  ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
    ,numbers:     ['0','1','2','3','4','5','6','7','8','9']
    }
    ,
    {description: 'MATHEMATICAL BOLD'
    ,upper_case:  ['','','','','','','','','','','','','','','','','','','','','','','','','','']
    ,lower_case:  ['','','','','','','','','','','','','','','','','','','','','','','','','','']
    ,numbers:     ['','','','','','','','','','']
    }
    ]
    

    putting all of the data above (as object) into a variable named a..

    function replacer(key, value){
      if("string" !== typeof value) return value;
      value = value.replace(/[\u00A0-\uFFFF]/gm, function(c){return "\\u" + ("0000" + c.charCodeAt(0).toString(16)).substr(-4); });
      return value;
    }
    
    console.log(
      JSON.stringify(a, replacer)
    );
    

    which will give you:

    [{"description":"ascii","upper_case":["A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z"],"lower_case":["a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z"],"numbers":["0","1","2","3","4","5","6","7","8","9"]},{"description":"MATHEMATICAL BOLD","upper_case":["\\ud835\\udc00","\\ud835\\udc01","\\ud835\\udc02","\\ud835\\udc03","\\ud835\\udc04","\\ud835\\udc05","\\ud835\\udc06","\\ud835\\udc07","\\ud835\\udc08","\\ud835\\udc09","\\ud835\\udc0a","\\ud835\\udc0b","\\ud835\\udc0c","\\ud835\\udc0d","\\ud835\\udc0e","\\ud835\\udc0f","\\ud835\\udc10","\\ud835\\udc11","\\ud835\\udc12","\\ud835\\udc13","\\ud835\\udc14","\\ud835\\udc15","\\ud835\\udc16","\\ud835\\udc17","\\ud835\\udc18","\\ud835\\udc19"],"lower_case":["\\ud835\\udc1a","\\ud835\\udc1b","\\ud835\\udc1c","\\ud835\\udc1d","\\ud835\\udc1e","\\ud835\\udc1f","\\ud835\\udc20","\\ud835\\udc21","\\ud835\\udc22","\\ud835\\udc23","\\ud835\\udc24","\\ud835\\udc25","\\ud835\\udc26","\\ud835\\udc27","\\ud835\\udc28","\\ud835\\udc29","\\ud835\\udc2a","\\ud835\\udc2b","\\ud835\\udc2c","\\ud835\\udc2d","\\ud835\\udc2e","\\ud835\\udc2f","\\ud835\\udc30","\\ud835\\udc31","\\ud835\\udc32","\\ud835\\udc33"],"numbers":["\\ud835\\udfce","\\ud835\\udfcf","\\ud835\\udfd0","\\ud835\\udfd1","\\ud835\\udfd2","\\ud835\\udfd3","\\ud835\\udfd4","\\ud835\\udfd5","\\ud835\\udfd6","\\ud835\\udfd7"]}]
    

    or beautified:

    [{
      "description":  "ascii"
      , "upper_case": ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"]
      , "lower_case": ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"]
      , "numbers":    ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
    }, {
      "description":  "MATHEMATICAL BOLD"
      , "upper_case": ["\\ud835\\udc00", "\\ud835\\udc01", "\\ud835\\udc02", "\\ud835\\udc03", "\\ud835\\udc04", "\\ud835\\udc05", "\\ud835\\udc06", "\\ud835\\udc07", "\\ud835\\udc08", "\\ud835\\udc09", "\\ud835\\udc0a", "\\ud835\\udc0b", "\\ud835\\udc0c", "\\ud835\\udc0d", "\\ud835\\udc0e", "\\ud835\\udc0f", "\\ud835\\udc10", "\\ud835\\udc11", "\\ud835\\udc12", "\\ud835\\udc13", "\\ud835\\udc14", "\\ud835\\udc15", "\\ud835\\udc16", "\\ud835\\udc17", "\\ud835\\udc18", "\\ud835\\udc19"]
      , "lower_case": ["\\ud835\\udc1a", "\\ud835\\udc1b", "\\ud835\\udc1c", "\\ud835\\udc1d", "\\ud835\\udc1e", "\\ud835\\udc1f", "\\ud835\\udc20", "\\ud835\\udc21", "\\ud835\\udc22", "\\ud835\\udc23", "\\ud835\\udc24", "\\ud835\\udc25", "\\ud835\\udc26", "\\ud835\\udc27", "\\ud835\\udc28", "\\ud835\\udc29", "\\ud835\\udc2a", "\\ud835\\udc2b", "\\ud835\\udc2c", "\\ud835\\udc2d", "\\ud835\\udc2e", "\\ud835\\udc2f", "\\ud835\\udc30", "\\ud835\\udc31", "\\ud835\\udc32", "\\ud835\\udc33"]
      , "numbers":    ["\\ud835\\udfce", "\\ud835\\udfcf", "\\ud835\\udfd0", "\\ud835\\udfd1", "\\ud835\\udfd2", "\\ud835\\udfd3", "\\ud835\\udfd4", "\\ud835\\udfd5", "\\ud835\\udfd6", "\\ud835\\udfd7"]
    }]
    

    Which is truly an “ASCII-safe” version.

    You may modify the result of the JSON.stringify, by replacing JSON.stringify(a, replacer) with JSON.stringify(a, replacer).replace(/\\\\u/g, "\\u") which will give your this:

    [{"description":"ascii","upper_case":["A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z"],"lower_case":["a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z"],"numbers":["0","1","2","3","4","5","6","7","8","9"]},{"description":"MATHEMATICAL BOLD","upper_case":["\ud835\udc00","\ud835\udc01","\ud835\udc02","\ud835\udc03","\ud835\udc04","\ud835\udc05","\ud835\udc06","\ud835\udc07","\ud835\udc08","\ud835\udc09","\ud835\udc0a","\ud835\udc0b","\ud835\udc0c","\ud835\udc0d","\ud835\udc0e","\ud835\udc0f","\ud835\udc10","\ud835\udc11","\ud835\udc12","\ud835\udc13","\ud835\udc14","\ud835\udc15","\ud835\udc16","\ud835\udc17","\ud835\udc18","\ud835\udc19"],"lower_case":["\ud835\udc1a","\ud835\udc1b","\ud835\udc1c","\ud835\udc1d","\ud835\udc1e","\ud835\udc1f","\ud835\udc20","\ud835\udc21","\ud835\udc22","\ud835\udc23","\ud835\udc24","\ud835\udc25","\ud835\udc26","\ud835\udc27","\ud835\udc28","\ud835\udc29","\ud835\udc2a","\ud835\udc2b","\ud835\udc2c","\ud835\udc2d","\ud835\udc2e","\ud835\udc2f","\ud835\udc30","\ud835\udc31","\ud835\udc32","\ud835\udc33"],"numbers":["\ud835\udfce","\ud835\udfcf","\ud835\udfd0","\ud835\udfd1","\ud835\udfd2","\ud835\udfd3","\ud835\udfd4","\ud835\udfd5","\ud835\udfd6","\ud835\udfd7"]}]
    

    or beautified:

    [{
      "description":  "ascii"
      , "upper_case": ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"]
      , "lower_case": ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"]
      , "numbers":    ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
    }, {
      "description":  "MATHEMATICAL BOLD"
      , "upper_case": ["\ud835\udc00", "\ud835\udc01", "\ud835\udc02", "\ud835\udc03", "\ud835\udc04", "\ud835\udc05", "\ud835\udc06", "\ud835\udc07", "\ud835\udc08", "\ud835\udc09", "\ud835\udc0a", "\ud835\udc0b", "\ud835\udc0c", "\ud835\udc0d", "\ud835\udc0e", "\ud835\udc0f", "\ud835\udc10", "\ud835\udc11", "\ud835\udc12", "\ud835\udc13", "\ud835\udc14", "\ud835\udc15", "\ud835\udc16", "\ud835\udc17", "\ud835\udc18", "\ud835\udc19"]
      , "lower_case": ["\ud835\udc1a", "\ud835\udc1b", "\ud835\udc1c", "\ud835\udc1d", "\ud835\udc1e", "\ud835\udc1f", "\ud835\udc20", "\ud835\udc21", "\ud835\udc22", "\ud835\udc23", "\ud835\udc24", "\ud835\udc25", "\ud835\udc26", "\ud835\udc27", "\ud835\udc28", "\ud835\udc29", "\ud835\udc2a", "\ud835\udc2b", "\ud835\udc2c", "\ud835\udc2d", "\ud835\udc2e", "\ud835\udc2f", "\ud835\udc30", "\ud835\udc31", "\ud835\udc32", "\ud835\udc33"]
      , "numbers":    ["\ud835\udfce", "\ud835\udfcf", "\ud835\udfd0", "\ud835\udfd1", "\ud835\udfd2", "\ud835\udfd3", "\ud835\udfd4", "\ud835\udfd5", "\ud835\udfd6", "\ud835\udfd7"]
    }]
    

    Which is probably what you’ve expected to have :]

    It is a good philosophical question wherever the string is ASCII, but it’s content is Unicode, since JavaScript tend to parse strings looks like \uXXXX directly, on the fly, back to true Unicode.
    So if you’ll be process it to another application or textarea for display, it should probably still look fine,
    storing the second output in a variable will likely to give you the unescaped version.. :/

  • You can have it transform Unicode-characters to HTTP-Entity:
    function replacer(key, value){
      if("string" !== typeof value) return value;
      value = value.replace(/[\u00A0-\uFFFF]/gm, function(c){return '&#' + c.charCodeAt(0) + ';'});
      return value;
    }
    
    console.log(
      JSON.stringify(a, replacer, "  ")
    );
    

    which will give you:

    [{"description":"ascii","upper_case":["A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z"],"lower_case":["a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z"],"numbers":["0","1","2","3","4","5","6","7","8","9"]},{"description":"MATHEMATICAL%20BOLD","upper_case":["%uD835%uDC00","%uD835%uDC01","%uD835%uDC02","%uD835%uDC03","%uD835%uDC04","%uD835%uDC05","%uD835%uDC06","%uD835%uDC07","%uD835%uDC08","%uD835%uDC09","%uD835%uDC0A","%uD835%uDC0B","%uD835%uDC0C","%uD835%uDC0D","%uD835%uDC0E","%uD835%uDC0F","%uD835%uDC10","%uD835%uDC11","%uD835%uDC12","%uD835%uDC13","%uD835%uDC14","%uD835%uDC15","%uD835%uDC16","%uD835%uDC17","%uD835%uDC18","%uD835%uDC19"],"lower_case":["%uD835%uDC1A","%uD835%uDC1B","%uD835%uDC1C","%uD835%uDC1D","%uD835%uDC1E","%uD835%uDC1F","%uD835%uDC20","%uD835%uDC21","%uD835%uDC22","%uD835%uDC23","%uD835%uDC24","%uD835%uDC25","%uD835%uDC26","%uD835%uDC27","%uD835%uDC28","%uD835%uDC29","%uD835%uDC2A","%uD835%uDC2B","%uD835%uDC2C","%uD835%uDC2D","%uD835%uDC2E","%uD835%uDC2F","%uD835%uDC30","%uD835%uDC31","%uD835%uDC32","%uD835%uDC33"],"numbers":["%uD835%uDFCE","%uD835%uDFCF","%uD835%uDFD0","%uD835%uDFD1","%uD835%uDFD2","%uD835%uDFD3","%uD835%uDFD4","%uD835%uDFD5","%uD835%uDFD6","%uD835%uDFD7"]}]
    

    or beautified:

    [{
      "description":  "ascii"
      , "upper_case": ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"]
      , "lower_case": ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"]
      , "numbers":    ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
    }, {
      "description":  "MATHEMATICAL%20BOLD"
      , "upper_case": ["%uD835%uDC00", "%uD835%uDC01", "%uD835%uDC02", "%uD835%uDC03", "%uD835%uDC04", "%uD835%uDC05", "%uD835%uDC06", "%uD835%uDC07", "%uD835%uDC08", "%uD835%uDC09", "%uD835%uDC0A", "%uD835%uDC0B", "%uD835%uDC0C", "%uD835%uDC0D", "%uD835%uDC0E", "%uD835%uDC0F", "%uD835%uDC10", "%uD835%uDC11", "%uD835%uDC12", "%uD835%uDC13", "%uD835%uDC14", "%uD835%uDC15", "%uD835%uDC16", "%uD835%uDC17", "%uD835%uDC18", "%uD835%uDC19"]
      , "lower_case": ["%uD835%uDC1A", "%uD835%uDC1B", "%uD835%uDC1C", "%uD835%uDC1D", "%uD835%uDC1E", "%uD835%uDC1F", "%uD835%uDC20", "%uD835%uDC21", "%uD835%uDC22", "%uD835%uDC23", "%uD835%uDC24", "%uD835%uDC25", "%uD835%uDC26", "%uD835%uDC27", "%uD835%uDC28", "%uD835%uDC29", "%uD835%uDC2A", "%uD835%uDC2B", "%uD835%uDC2C", "%uD835%uDC2D", "%uD835%uDC2E", "%uD835%uDC2F", "%uD835%uDC30", "%uD835%uDC31", "%uD835%uDC32", "%uD835%uDC33"]
      , "numbers":    ["%uD835%uDFCE", "%uD835%uDFCF", "%uD835%uDFD0", "%uD835%uDFD1", "%uD835%uDFD2", "%uD835%uDFD3", "%uD835%uDFD4", "%uD835%uDFD5", "%uD835%uDFD6", "%uD835%uDFD7"]
    }]
    

Happy escaping! :)

Leave a Reply