`
dcaoyuan
  • 浏览: 307338 次
社区版块
存档分类
最新评论

Parse JSON to xmerl Compitable XML Tree via A Simple XML State Machine

阅读更多

Updated Aug 16: Fix bugs when json is an array. Add a 'json:root' element always since valid xml should have a root. Remove 'obj' tag that is not necessary.

Updated Aug 15: A more complete json_parser.erl. Thanks for tonyg's beautiful work, fixed some bugs.

Updated Aug 5: rewrote json_parser.erl base on tonyg's RFC4627 implementation, fixed some bugs.

In my previous blog: A Simple XML State Machine Accepting SAX Events to Build xmerl Compitable XML Tree: icalendar demo, I wrote a simple state machine to parse icalendar to xmerl compitable XML tree. This time, I'll use this state machine to parse a JSON expression to xmerl compitable XML tree, the work is fairly simple:

%%---------------------------------------------------------------------------
%% Copyright (c) 2007 Tony Garnock-Jones <tonyg@kcbbs.gen.nz>
%% Copyright (c) 2007 LShift Ltd. <query@lshift.net>
%% Copyright (c) 2007 LightPole, Inc. 
%%
%% Permission is hereby granted, free of charge, to any person
%% obtaining a copy of this software and associated documentation
%% files (the "Software"), to deal in the Software without
%% restriction, including without limitation the rights to use, copy,
%% modify, merge, publish, distribute, sublicense, and/or sell copies
%% of the Software, and to permit persons to whom the Software is
%% furnished to do so, subject to the following conditions:
%%
%% The above copyright notice and this permission notice shall be
%% included in all copies or substantial portions of the Software.
%%
%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
%% EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
%% MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
%% NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
%% BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
%% ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
%% CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
%% SOFTWARE.
%%---------------------------------------------------------------------------
%%
-module(json_parser).

-define(stateMachine, fun xml_sm:state/2).

-define(JsonNSUri,   "http://www.lightpole.net/xmlns/1.0").
-define(JsonNSAtrr,  {'xmlns:json', ?JsonNSUri}).
-define(JsonNSRoot,  'json:root').
-define(JsonNSArray, 'json:array').

-record(context, {machine,
                  qname}).

-export([parse_to_xml/1,
         parse_to_poet/1]).

-export([test/0]).

parse_to_xml(Data) ->
    parse(Data, #context{machine = fun xml_sm:state/2}).
        
parse_to_poet(Data) ->
    parse(Data, #context{machine = fun poet_sm:state/2}).

parse(Bin, Context) when is_binary(Bin) ->
    parse(binary_to_list(Bin), Context);
parse(Str, #context{machine=MachineFun}=Context) ->
    State1 = MachineFun({startDocument}, undefined),
    State2 = parse_root(skip_ws(Str), State1, Context),
    _State = MachineFun({endDocument}, State2).

%% since a valid xml should have a root element, we add one here.
parse_root([${|T], State, #context{machine=MachineFun}=Context) ->
    State1 = MachineFun({startElement, ?JsonNSUri, root, ?JsonNSRoot, [?JsonNSAtrr]}, State),
    Context1 = Context#context{qname = undefined},
    {_Rest, State2} = parse_object(skip_ws(T), State1, Context1),
    _State = MachineFun({endElement, ?JsonNSUri, root, ?JsonNSRoot}, State2); 
parse_root([$[|T], State, #context{machine=MachineFun}=Context) ->
    State1 = MachineFun({startElement, ?JsonNSUri, root, ?JsonNSRoot, [?JsonNSAtrr]}, State),
    Context1 = Context#context{qname = ?JsonNSArray},
    {_Rest, State2} = parse_array(skip_ws(T), State1, Context1),
    _State = MachineFun({endElement, ?JsonNSUri, root, ?JsonNSRoot}, State2). 

parse_object([$}|T], State, _Context) ->
    {T, State};
parse_object([$,|T], State, Context) ->
    parse_object(skip_ws(T), State, Context);
parse_object([$"|T], State, #context{machine=MachineFun}=Context) ->
    {Rest, ObjNameStr} = parse_string(skip_ws(T), []),
    ObjName = list_to_atom(ObjNameStr),
    Context1 = Context#context{qname = ObjName},
    [$:|T1] = skip_ws(Rest),
    {Rest1, State1} = 
        case skip_ws(T1) of
            [$[|T2] ->
                %% the value is array, we'll create a list of elements named as this 'ObjName'
                parse_array(skip_ws(T2), State, Context1);
            _ ->
                StateX1 = MachineFun({startElement, "", ObjName, ObjName, []}, State),
                {RestX, StateX2} = parse_value(skip_ws(T1), StateX1, Context1),
                StateX3 = MachineFun({endElement, "", ObjName, ObjName}, StateX2),
                {RestX, StateX3}
        end,
    parse_object(skip_ws(Rest1), State1, Context1).

parse_array([$]|T], State, _Context) ->
    {T, State};
parse_array([$,|T], State, Context) ->
    parse_array(skip_ws(T), State, Context);
parse_array(Chars, State, #context{machine=MachineFun, qname=QName}=Context) ->
    State1 = MachineFun({startElement, "", QName, QName, []}, State),
    {Rest, State2} = parse_value(Chars, State1, Context),
    State3 = MachineFun({endElement, "", QName, QName}, State2),
    parse_array(skip_ws(Rest), State3, Context).

parse_value([], State, _Context) -> 
    {[], State};
parse_value("true"++T, State, #context{machine=MachineFun}) -> 
    State1 = MachineFun({characters, "true"}, State),
    {T, State1};
parse_value("false"++T, State, #context{machine=MachineFun}) ->
    State1 = MachineFun({characters, "false"}, State),
    {T, State1};
parse_value("null"++T, State, #context{machine=MachineFun}) ->
    State1 = MachineFun({characters, "null"}, State),
    {T, State1};
parse_value([$"|T], State, #context{machine=MachineFun}) -> 
    {Rest, Value} = parse_string(T, []),
    State1 = MachineFun({characters, Value}, State),
    {Rest, State1};
parse_value([${|T], State, Context) -> 
    parse_object(skip_ws(T), State, Context);
parse_value([$[|T], State, Context) -> 
    parse_array(skip_ws(T), State, Context);
parse_value(Chars, State, #context{machine=MachineFun}) -> 
    {Rest, Value} = parse_number(skip_ws(Chars), []),
    State1 = MachineFun({characters, Value}, State),
    {Rest, State1}.



parse_string([$"|T], Acc) ->
    {T, lists:reverse(Acc)};
parse_string([$\\, Key|T], Acc) ->
    parse_escaped_char(Key, T, Acc);
parse_string([H|T], Acc) ->
    parse_string(T, [H|Acc]).

parse_escaped_char($b,  Rest, Acc) -> parse_string(Rest, [8|Acc]);
parse_escaped_char($t,  Rest, Acc) -> parse_string(Rest, [9|Acc]);
parse_escaped_char($n,  Rest, Acc) -> parse_string(Rest, [10|Acc]);
parse_escaped_char($f,  Rest, Acc) -> parse_string(Rest, [12|Acc]);
parse_escaped_char($r,  Rest, Acc) -> parse_string(Rest, [13|Acc]);
parse_escaped_char($/,  Rest, Acc) -> parse_string(Rest, [$/|Acc]);
parse_escaped_char($\\, Rest, Acc) -> parse_string(Rest, [$\\|Acc]);
parse_escaped_char($",  Rest, Acc) -> parse_string(Rest, [$"|Acc]);
parse_escaped_char($u,  [D0, D1, D2, D3|Rest], Acc) ->
    parse_string(Rest, [(digit_hex(D0) bsl 12) +
      (digit_hex(D1) bsl 8) +
      (digit_hex(D2) bsl 4) +
      (digit_hex(D3))|Acc]).

digit_hex($0) -> 0;
digit_hex($1) -> 1;
digit_hex($2) -> 2;
digit_hex($3) -> 3;
digit_hex($4) -> 4;
digit_hex($5) -> 5;
digit_hex($6) -> 6;
digit_hex($7) -> 7;
digit_hex($8) -> 8;
digit_hex($9) -> 9;
digit_hex($A) -> 10;
digit_hex($B) -> 11;
digit_hex($C) -> 12;
digit_hex($D) -> 13;
digit_hex($E) -> 14;
digit_hex($F) -> 15;
digit_hex($a) -> 10;
digit_hex($b) -> 11;
digit_hex($c) -> 12;
digit_hex($d) -> 13;
digit_hex($e) -> 14;
digit_hex($f) -> 15.

finish_number(Rest, Acc) ->
    Value = lists:reverse(Acc),
%    Value = 
%        case catch list_to_integer(Str) of
%      {'EXIT', _} -> list_to_float(Str);
%      Number -> Number
%        end,
    {Rest, Value}.

parse_number([], _Acc) ->
    exit(syntax_error);
parse_number([$-|T], Acc) ->
    parse_number1(T, [$-|Acc]);
parse_number(Rest, Acc) ->
    parse_number1(Rest, Acc).

parse_number1(Rest, Acc) ->
    {Acc1, Rest1} = parse_int_part(Rest, Acc),
    case Rest1 of
  [] -> finish_number([], Acc1);
  [$.|More] ->
            {Acc2, Rest2} = parse_int_part(More, [$.| Acc1]),
            parse_exp(Rest2, Acc2, false);
        _ ->
            parse_exp(Rest1, Acc1, true)
    end.


parse_int_part([], Acc) ->
    {Acc, []};
parse_int_part([Ch|Rest], Acc) ->
    case is_digit(Ch) of
  true  -> parse_int_part(Rest, [Ch | Acc]);
  false -> {Acc, [Ch | Rest]}
    end.

parse_exp([$e|T], Acc, NeedFrac) ->
    parse_exp1(T, Acc, NeedFrac);
parse_exp([$E|T], Acc, NeedFrac) ->
    parse_exp1(T, Acc, NeedFrac);
parse_exp(Rest, Acc, _NeedFrac) ->
    finish_number(Rest, Acc).

parse_exp1(Rest, Acc, NeedFrac) ->
    {Acc1, Rest1} = parse_signed_int_part(Rest, if  NeedFrac -> [$e, $0, $.|Acc];
                true -> [$e|Acc]
            end),
    finish_number(Rest1, Acc1).

parse_signed_int_part([$+|T], Acc) ->
    parse_int_part(T, [$+|Acc]);
parse_signed_int_part([$-|T], Acc) ->
    parse_int_part(T, [$-|Acc]);
parse_signed_int_part(Rest, Acc) ->
    parse_int_part(Rest, Acc).

is_digit(C) when is_integer(C) andalso C >= $0 andalso C =< $9 -> true;
is_digit(_) -> false.
    

skip_ws([H|T]) when H =< 32 ->
    skip_ws(T);
skip_ws(Chars) ->
    Chars.



test() ->
    Text1 = "{\"firstname\":\"Caoyuan\", \"iq\":\"150\"}",
    {ok, Xml1} = parse_to_xml(Text1),
    XmlText1 = lists:flatten(xmerl:export_simple([Xml1], xmerl_xml)),
    io:fwrite(user, "Parsed XML: ~n~p~n", [XmlText1]),
    {ok, Poet1} = parse_to_poet(Text1),
    io:fwrite(user, "Parsed POET: ~n~p~n", [Poet1]),

    Text2 = "[{\"firstname\":\"Caoyuan\", \"iq\":\"150\"}, 
              {\"firstname\":\"Haobo\", \"iq\":150}]", 
    {ok, Xml2} = parse_to_xml(Text2),
    XmlText2 = lists:flatten(xmerl:export_simple([Xml2], xmerl_xml)),
    io:fwrite(user, "Parsed: ~n~p~n", [XmlText2]),

    Text = "
{\"businesses\": [{\"address1\": \"650 Mission Street\",
                   \"address2\": \"\",
                   \"avg_rating\": 4.5,
                   \"categories\": [{\"category_filter\": \"localflavor\",
                                     \"name\": \"Local Flavor\",
                                     \"search_url\": \"http://lightpole.net/search\"}],
                   \"city\": \"San Francisco\",
                   \"distance\": 0.085253790020942688,
                   \"id\": \"4kMBvIEWPxWkWKFN__8SxQ\",
                   \"latitude\": 37.787185668945298,
                   \"longitude\": -122.40093994140599},
                  {\"address1\": \"25 Maiden Lane\",
                   \"address2\": \"\",
                   \"avg_rating\": 5.0,
                   \"categories\": [{\"category_filter\": \"localflavor\",
                                     \"name\": \"Local Flavor\",
                                     \"search_url\": \"http://lightpole.net/search\"}],
                   \"city\": \"San Francisco\",
                   \"distance\": 0.23186808824539185,
                   \"id\": \"O1zPF_b7RyEY_NNsizX7Yw\",
                   \"latitude\": 37.788387,
                   \"longitude\": -122.40401}]} ",
    {ok, Xml} = parse_to_xml(Text),
    %io:fwrite(user, "Xml Tree: ~p~n", [Xml]),
    XmlText = lists:flatten(xmerl:export_simple([Xml], xmerl_xml)),
    io:fwrite(user, "Parsed: ~n~p~n", [XmlText]),
    Latitude1 = xmerl_xpath:string("/lp:root/businesses[1]/latitude/text()", Xml),
    io:format(user, "Latitude1: ~p~n", [Latitude1]).

The result will be something like:

<?xml version="1.0"?>
<json:root xmlns:json="http://www.lightpole.net/xmlns/1.0">
  <businesses>
    <address1>650 Mission Street</address1>
    <address2></address2>
    <avg_rating>4.5</avg_rating>
    <categories>
      <category_filter>localflavor</category_filter>
      <name>Local Flavor</name>
      <search_url>http://lightpole.net/search</search_url>
    </categories>
    <city>San Francisco</city>
    <distance>0.085253790020942688</distance>
    <id>4kMBvIEWPxWkWKFN__8SxQ</id>
    <latitude>37.787185668945298</latitude>
    <longitude>-122.40093994140599</longitude>
  </businesses>
  <businesses>
    <address1>25 Maiden Lane</address1>
    <address2></address2>
    <avg_rating>5.0</avg_rating>
    <categories>
      <category_filter>localflavor</category_filter>
      <name>Local Flavor</name>
      <search_url>http://lightpole.net/search</search_url>
    </categories>
    <city>San Francisco</city>
    <distance>0.23186808824539185</distance>
    <id>O1zPF_b7RyEY_NNsizX7Yw</id>
    <latitude>37.788387</latitude>
    <longitude>-122.40401</longitude>
  </businesses>
</root>

Now you fecth element by:

> [Latitude1] = xmerl_xpath:string("/json:root/businesses[1]/latitude/text()", Xml),
> Latitude1#xmlText.value.
"37.787185668945298"

Next time, I'll write a simple Erlang Data state machine, which will parse icalendar and json to simple Erlang Lists + Tuples.

The code of xml_sm.erl can be found in my previous blog.

分享到:
评论

相关推荐

    Parse JSON in TSQL

    Is it possible to parse JSON in TSQL? I dont mean to create a JSON string, i mean to parse a json string passed in as a parameter.数据库parseJSON 转表

    xml-to-json.rar_XML to JSON _jquery.xml2json.js_xml json_xml to

    在上述代码中,`$.parseXML()`函数用于将XML字符串解析为DOM对象,然后`$.xml2json()`函数将DOM对象转换为JSON对象。转换后的JSON对象可以直接在JavaScript环境中使用,如遍历、操作或发送到服务器。 值得注意的是...

    Xml2Json and Json2Xml

    XML(Extensible Markup Language)和JSON(JavaScript Object Notation)是两种常见的数据交换格式,广泛应用于Web服务和数据传输。XML是一种结构化语言,用于描述数据,而JSON更简洁,通常用于JavaScript应用程序...

    (C#)json to xml 解析转换源代码

    标题中的"(C#)json to xml 解析转换源代码"指出我们将探讨一个C#实现的JSON到XML的转换工具。这种转换通常涉及两个主要步骤:首先解析JSON字符串,然后将解析得到的数据结构转换成XML表示。 描述中提到的"用C#写的...

    ParseJson.txt

    大多数网上下载的SQL parseJson函数都存在Bug,我也是下载应用到公司系统开发后才发现,无奈系统已经正式使用,硬着头皮改Bug,至少改了两处Bug,修改不易,希望大家体谅一下.

    JSON2.JS JSON.JS JSON_PARSE.JS

    toJSONString method and a parseJSON method to Object.prototype. Use of this file is not recommended. json_parse.js: This file contains an alternative JSON parse function that uses recursive descent ...

    Jboss启动报Failed to parse WEB-INFweb.xml; - nested throwable错误

    Jboss启动报Failed to parse WEB-INF/web.xml; - nested throwable错误解决方案 在Jboss应用服务器中,启动报错Failed to parse WEB-INF/web.xml; - nested throwable是一种常见的错误,本文将对此错误进行深入分析...

    json报文与XML互转

    JSON(JavaScript Object Notation)和XML(eXtensible Markup Language)是两种广泛使用的数据交换格式,它们在web服务和应用程序之间传输数据时起到关键作用。本文将深入讲解如何在Java环境中将Java对象与JSON报文...

    jquery-xmlToJson.js

    总结来说,"jquery-xmlToJson.js"是一个用于XML到JSON转换的jQuery插件,它能够帮助开发者轻松地将XML数据转化为更适应JavaScript环境的JSON格式,提高数据处理的效率,并且兼容多种jQuery版本。在进行Web开发时,...

    C#写的Json与Xml互转支持.net framework2.0以上组件Newtonsoft.Json.dll

    在.NET开发环境中,数据交换和序列化经常是关键任务,其中JSON和XML是最常见的两种格式。本文将深入探讨如何在C#中使用Newtonsoft.Json.dll库进行JSON与XML之间的转换,特别关注对.NET Framework 2.0及以上版本的...

    C#json与Xml相互转换例子

    在.NET编程环境中,C#是一种常用的编程语言,而JSON(JavaScript Object Notation)和XML(eXtensible Markup Language)是两种广泛使用的数据交换格式。本文将深入探讨如何在C#中实现JSON与XML的相互转换,以满足...

    Table2JSONTree_C#根据TABLE产生JSON树_jsontree2table_

    在描述中提到的"Table2JSONTree_C#根据TABLE产生JSON树_jsontree2table_"是一个关于将数据库中的表格数据转换为JavaScript Object Notation (JSON)树形结构的过程。这种转换在前端开发中非常常见,特别是在需要在...

    纯C语言XML解析xmlparse.c&xmlparse;.h

    《纯C语言XML解析——xmlparse.c&xmlparse.h》 XML(eXtensible Markup Language)是一种广泛应用的标记语言,常用于数据交换、配置文件和文档存储等场景。在IT行业中,尤其是在需要跨平台或者对性能有较高要求的...

    perl的xml::simple解析文件

    Perl中的XML::Simple模块是Perl社区广泛使用的XML解析器,尤其适合处理小型或结构简单的XML文档。这个模块的名称虽然包含“Simple”,但它实际上提供了一种简洁的接口,用于将XML数据转换为Perl数据结构,反之亦然。...

    XML与JSON文件代码例子

    在Python中,有`xml.etree.ElementTree`库处理XML,`json`模块处理JSON。 总结来说,XML和JSON是数据表示的两种重要方式,它们各有特点,适应不同的应用场景。TestXML文件中的内容可能是展示如何在代码中创建、解析...

    json与xml使用

    JSON(JavaScript Object Notation)和XML(eXtensible Markup Language)是两种广泛用于数据交换的格式,尤其在Web服务和移动应用开发中。本文将深入探讨这两种格式在安卓平台上的使用及其优缺点。 首先,让我们...

    IOS XML类型转JSON类型

    在iOS开发中,XML(Extensible Markup Language)和JSON(JavaScript Object Notation)是两种常见的数据交换格式。XML因其结构严谨、易于解析而被广泛应用于数据存储和传输,而JSON则以其简洁、易于读写的特点在...

    Sql Server Json解析

    sql server 2014 JSON解析到表函数 CREATE FUNCTION [dbo].[parseJSON]( @JSON NVARCHAR(MAX)) RETURNS @hierarchy TABLE ( element_id INT IDENTITY(1, 1) NOT NULL, ...

    Android中xml转json

    在Android开发中,XML(可扩展标记语言)和JSON(JavaScript Object Notation)都是常见的数据交换格式。XML因其结构化特性适用于复杂的文档存储,而JSON则以其轻量级、易于阅读和编写的特点广泛用于Web服务的数据...

    json2.js json.parse json未定义 的解决方案!

    json2.js json.parse json未定义 的解决方案!

Global site tag (gtag.js) - Google Analytics