MongoDB  2.7.0
json.h
1 
29 #pragma once
30 
31 #include <string>
32 
33 #include "mongo/bson/bsonobj.h"
34 #include "mongo/base/status.h"
35 #include "mongo/client/export_macros.h"
36 
37 namespace mongo {
38 
52  MONGO_CLIENT_API BSONObj fromjson(const std::string& str);
53 
55  MONGO_CLIENT_API BSONObj fromjson(const char* str, int* len=NULL);
56 
62  class JParse {
63  public:
64  explicit JParse(const char*);
65 
66  /*
67  * Notation: All-uppercase symbols denote non-terminals; all other
68  * symbols are literals.
69  */
70 
71  /*
72  * VALUE :
73  * STRING
74  * | NUMBER
75  * | NUMBERINT
76  * | NUMBERLONG
77  * | OBJECT
78  * | ARRAY
79  *
80  * | true
81  * | false
82  * | null
83  * | undefined
84  *
85  * | NaN
86  * | Infinity
87  * | -Infinity
88  *
89  * | DATE
90  * | TIMESTAMP
91  * | REGEX
92  * | OBJECTID
93  * | DBREF
94  *
95  * | new CONSTRUCTOR
96  */
97  private:
98  Status value(const StringData& fieldName, BSONObjBuilder&);
99 
100  /*
101  * OBJECT :
102  * {}
103  * | { MEMBERS }
104  * | SPECIALOBJECT
105  *
106  * MEMBERS :
107  * PAIR
108  * | PAIR , MEMBERS
109  *
110  * PAIR :
111  * FIELD : VALUE
112  *
113  * SPECIALOBJECT :
114  * OIDOBJECT
115  * | BINARYOBJECT
116  * | DATEOBJECT
117  * | TIMESTAMPOBJECT
118  * | REGEXOBJECT
119  * | REFOBJECT
120  * | UNDEFINEDOBJECT
121  * | NUMBERLONGOBJECT
122  *
123  */
124  public:
125  Status object(const StringData& fieldName, BSONObjBuilder&, bool subObj=true);
126 
127  private:
128  /* The following functions are called with the '{' and the first
129  * field already parsed since they are both implied given the
130  * context. */
131  /*
132  * OIDOBJECT :
133  * { FIELD("$oid") : <24 character hex string> }
134  */
135  Status objectIdObject(const StringData& fieldName, BSONObjBuilder&);
136 
137  /*
138  * BINARYOBJECT :
139  * { FIELD("$binary") : <base64 representation of a binary string>,
140  * FIELD("$type") : <hexadecimal representation of a single byte
141  * indicating the data type> }
142  */
143  Status binaryObject(const StringData& fieldName, BSONObjBuilder&);
144 
145  /*
146  * DATEOBJECT :
147  * { FIELD("$date") : <64 bit signed integer for milliseconds since epoch> }
148  */
149  Status dateObject(const StringData& fieldName, BSONObjBuilder&);
150 
151  /*
152  * TIMESTAMPOBJECT :
153  * { FIELD("$timestamp") : {
154  * FIELD("t") : <32 bit unsigned integer for seconds since epoch>,
155  * FIELD("i") : <32 bit unsigned integer for the increment> } }
156  */
157  Status timestampObject(const StringData& fieldName, BSONObjBuilder&);
158 
159  /*
160  * NOTE: the rules for the body of the regex are different here,
161  * since it is quoted instead of surrounded by slashes.
162  * REGEXOBJECT :
163  * { FIELD("$regex") : <string representing body of regex> }
164  * | { FIELD("$regex") : <string representing body of regex>,
165  * FIELD("$options") : <string representing regex options> }
166  */
167  Status regexObject(const StringData& fieldName, BSONObjBuilder&);
168 
169  /*
170  * REFOBJECT :
171  * { FIELD("$ref") : <string representing collection name>,
172  * FIELD("$id") : <24 character hex string> }
173  * | { FIELD("$ref") : STRING , FIELD("$id") : OBJECTID }
174  * | { FIELD("$ref") : STRING , FIELD("$id") : OIDOBJECT }
175  */
176  Status dbRefObject(const StringData& fieldName, BSONObjBuilder&);
177 
178  /*
179  * UNDEFINEDOBJECT :
180  * { FIELD("$undefined") : true }
181  */
182  Status undefinedObject(const StringData& fieldName, BSONObjBuilder&);
183 
184  /*
185  * NUMBERLONGOBJECT :
186  * { FIELD("$numberLong") : "<number>" }
187  */
188  Status numberLongObject(const StringData& fieldName, BSONObjBuilder&);
189 
190  /*
191  * ARRAY :
192  * []
193  * | [ ELEMENTS ]
194  *
195  * ELEMENTS :
196  * VALUE
197  * | VALUE , ELEMENTS
198  */
199  Status array(const StringData& fieldName, BSONObjBuilder&);
200 
201  /*
202  * NOTE: Currently only Date can be preceded by the "new" keyword
203  * CONSTRUCTOR :
204  * DATE
205  */
206  Status constructor(const StringData& fieldName, BSONObjBuilder&);
207 
208  /* The following functions only parse the body of the constructor
209  * between the parentheses, not including the constructor name */
210  /*
211  * DATE :
212  * Date( <64 bit signed integer for milliseconds since epoch> )
213  */
214  Status date(const StringData& fieldName, BSONObjBuilder&);
215 
216  /*
217  * TIMESTAMP :
218  * Timestamp( <32 bit unsigned integer for seconds since epoch>,
219  * <32 bit unsigned integer for the increment> )
220  */
221  Status timestamp(const StringData& fieldName, BSONObjBuilder&);
222 
223  /*
224  * OBJECTID :
225  * ObjectId( <24 character hex string> )
226  */
227  Status objectId(const StringData& fieldName, BSONObjBuilder&);
228 
229  /*
230  * NUMBERLONG :
231  * NumberLong( <number> )
232  */
233  Status numberLong(const StringData& fieldName, BSONObjBuilder&);
234 
235  /*
236  * NUMBERINT :
237  * NumberInt( <number> )
238  */
239  Status numberInt(const StringData& fieldName, BSONObjBuilder&);
240 
241  /*
242  * DBREF :
243  * Dbref( <namespace string> , <24 character hex string> )
244  */
245  Status dbRef(const StringData& fieldName, BSONObjBuilder&);
246 
247  /*
248  * REGEX :
249  * / REGEXCHARS / REGEXOPTIONS
250  *
251  * REGEXCHARS :
252  * REGEXCHAR
253  * | REGEXCHAR REGEXCHARS
254  *
255  * REGEXCHAR :
256  * any-Unicode-character-except-/-or-\-or-CONTROLCHAR
257  * | \"
258  * | \'
259  * | \\
260  * | \/
261  * | \b
262  * | \f
263  * | \n
264  * | \r
265  * | \t
266  * | \v
267  * | \u HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT
268  * | \any-Unicode-character-except-x-or-[0-7]
269  *
270  * REGEXOPTIONS :
271  * REGEXOPTION
272  * | REGEXOPTION REGEXOPTIONS
273  *
274  * REGEXOPTION :
275  * g | i | m | s
276  */
277  Status regex(const StringData& fieldName, BSONObjBuilder&);
278  Status regexPat(std::string* result);
279  Status regexOpt(std::string* result);
280  Status regexOptCheck(const StringData& opt);
281 
282  /*
283  * NUMBER :
284  *
285  * NOTE: Number parsing is based on standard library functions, not
286  * necessarily on the JSON numeric grammar.
287  *
288  * Number as value - strtoll and strtod
289  * Date - strtoll
290  * Timestamp - strtoul for both timestamp and increment and '-'
291  * before a number explicity disallowed
292  */
293  Status number(const StringData& fieldName, BSONObjBuilder&);
294 
295  /*
296  * FIELD :
297  * STRING
298  * | [a-zA-Z$_] FIELDCHARS
299  *
300  * FIELDCHARS :
301  * [a-zA-Z0-9$_]
302  * | [a-zA-Z0-9$_] FIELDCHARS
303  */
304  Status field(std::string* result);
305 
306  /*
307  * STRING :
308  * " "
309  * | ' '
310  * | " CHARS "
311  * | ' CHARS '
312  */
313  Status quotedString(std::string* result);
314 
315  /*
316  * CHARS :
317  * CHAR
318  * | CHAR CHARS
319  *
320  * Note: " or ' may be allowed depending on whether the string is
321  * double or single quoted
322  *
323  * CHAR :
324  * any-Unicode-character-except-"-or-'-or-\-or-CONTROLCHAR
325  * | \"
326  * | \'
327  * | \\
328  * | \/
329  * | \b
330  * | \f
331  * | \n
332  * | \r
333  * | \t
334  * | \v
335  * | \u HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT
336  * | \any-Unicode-character-except-x-or-[0-9]
337  *
338  * HEXDIGIT : [0..9a..fA..F]
339  *
340  * per http://www.ietf.org/rfc/rfc4627.txt, control characters are
341  * (U+0000 through U+001F). U+007F is not mentioned as a control
342  * character.
343  * CONTROLCHAR : [0x00..0x1F]
344  *
345  * If there is not an error, result will contain a null terminated
346  * string, but there is no guarantee that it will not contain other
347  * null characters.
348  */
349  Status chars(std::string* result, const char* terminalSet, const char* allowedSet=NULL);
350 
357  std::string encodeUTF8(unsigned char first, unsigned char second) const;
358 
365  inline bool peekToken(const char* token);
366 
373  inline bool readToken(const char* token);
374 
381  bool readTokenImpl(const char* token, bool advance=true);
382 
387  bool readField(const StringData& field);
388 
393  bool match(char matchChar, const char* matchSet) const;
394 
398  bool isHexString(const StringData&) const;
399 
404  bool isBase64String(const StringData&) const;
405 
410  Status parseError(const StringData& msg);
411  public:
412  inline int offset() { return (_input - _buf); }
413 
414  private:
415  /*
416  * _buf - start of our input buffer
417  * _input - cursor we advance in our input buffer
418  * _input_end - sentinel for the end of our input buffer
419  *
420  * _buf is the null terminated buffer containing the JSON string we
421  * are parsing. _input_end points to the null byte at the end of
422  * the buffer. strtoll, strtol, and strtod will access the null
423  * byte at the end of the buffer because they are assuming a c-style
424  * string.
425  */
426  const char* const _buf;
427  const char* _input;
428  const char* const _input_end;
429  };
430 
431 } // namespace mongo
Utility for creating a BSONObj.
Definition: bsonobjbuilder.h:52
Parser class.
Definition: json.h:62
MONGO_CLIENT_API BSONObj fromjson(const std::string &str)
Create a BSONObj from a JSON http://www.json.org, http://www.ietf.org/rfc/rfc4627.txt string.