JsonTokenizer.php 9.01 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
<?php

class JsonTokenizer
{

    const CONTEXT_OBJECT = 1;
    const CONTEXT_ARRAY  = 2;

    const TOKEN_OBJECT_START   = 1;
    const TOKEN_OBJECT_END     = 2;
    const TOKEN_ARRAY_START    = 4;
    const TOKEN_ARRAY_END      = 8;
    const TOKEN_SCALAR         = 16;
    const TOKEN_KEY            = 32;
    const TOKEN_ITEM_SEPARATOR = 64;

    const EXPECTED_ANY         = 127;
    const EXPECTED_ARRAY_ITEM  = 29;  // Object Start, Array Start, Scalar, Array End
    const EXPECTED_OBJECT_ITEM = 23;  // Object Start, Array Start, Scalar, Object End
    const EXPECTED_SEPARATOR   = 64;
    const EXPECTED_KEY         = 32;
    const EXPECTED_ARRAY_END   = 8;
    const EXPECTED_OBJECT_END  = 2;

    /** @var resource */
    protected $stream;
    protected $threshold;

    /** @var array */
    protected $context  = array();
    protected $expected;

    /** @var array */
    protected $token = array();

    /** @var array */
    protected $buffered = array();

    /**
     * @param  resource                  $stream
     * @throws \InvalidArgumentException
     */
    public function __construct($stream, $threshold = PHP_INT_MAX)
    {
        if (!is_resource($stream) || get_resource_type($stream) != 'stream') {
            throw new \Exception("Argument is not a stream");
        }

        $this->stream = $stream;
        $this->threshold = $threshold;
        $this->expected = self::EXPECTED_ANY;
    }

    /**
     * @return array
     * @throws ReadingError
     */
    public function next()
    {
        $this->token = $this->fetch();

        if (!$this->token['token']) {
            return false;
        }

        if (!($this->token['token'] & $this->expected)) {
            throw new \Exception(sprintf("Read unexpected token %s/%s", $this->token['token'], $this->expected));
        }

        switch ($this->token['token']) {
            case self::TOKEN_ARRAY_START:
                $this->context[] = self::CONTEXT_ARRAY;
                $this->expected  = self::EXPECTED_ARRAY_ITEM;
                break;
            case self::TOKEN_OBJECT_START:
                $this->context[] = self::CONTEXT_OBJECT;
                $this->expected  = self::EXPECTED_OBJECT_ITEM;
                break;
            case self::TOKEN_OBJECT_END:
            case self::TOKEN_ARRAY_END:
                array_pop($this->context);
            // no break;
            case self::TOKEN_SCALAR:
                if ($this->context()) {
                    $this->expected  = self::EXPECTED_SEPARATOR;
                    $this->expected |= $this->context() == self::CONTEXT_ARRAY ?
                        self::EXPECTED_ARRAY_END :self::EXPECTED_OBJECT_END;
                } else {
                    $this->expected = 0;
                }
                break;
            case self::TOKEN_ITEM_SEPARATOR:
                $this->expected = $this->context() == self::CONTEXT_ARRAY ?
                    self::EXPECTED_ARRAY_ITEM : self::EXPECTED_OBJECT_ITEM;

                return $this->next();
        }

        return $this->token;
    }

    /**
     * @return array
     * @throws ReadingError
     */
    protected function fetch()
    {
        if ($this->context() == self::CONTEXT_OBJECT) {
            list($token, $key) = $this->readKey();
            if ($token != self::TOKEN_KEY) {
                return array(
                    'key'     => null,
                    'token'   => $token,
                    'content' => null
                );
            }
        } else {
            $key = null;
        }

        list($token, $content) = $this->readValue();

        return array(
            'key'     => $key,
            'token'   => $token,
            'content' => $content
        );
    }

    /**
     * @throws ReadingError
     */
    protected function readKey()
    {
        list($token, $key) = $this->readKeyToken();

        if ($token == self::TOKEN_KEY) {
            $char = $this->findSymbol();
            if ($char != ":") {
                throw new \Exception(sprintf("Expecting key-value separator, got \"%s\"", $char));
            }
        }

        return array($token, $key);
    }

    /**
     * @return array
     */
    protected function readKeyToken()
    {
        $char = $this->findSymbol();

        switch ($char) {
            case "}":
                return array(self::TOKEN_OBJECT_END,     null);
            case "]":
                return array(self::TOKEN_ARRAY_END,      null);
            case ",":
                return array(self::TOKEN_ITEM_SEPARATOR, null);
            case "\"":
                return array(self::TOKEN_KEY, $this->readString($char));
        }

        return array(null, null);
    }

    /**
     * @return array
     * @throws ReadingError
     */
    protected function readValue()
    {
        $char = $this->findSymbol();

        if ($char === "" || $char === false) {
            return array(null, null);
        }

        switch ($char) {
            case "{":
                return array(self::TOKEN_OBJECT_START,   null);
            case "}":
                return array(self::TOKEN_OBJECT_END,     null);
            case "[":
                return array(self::TOKEN_ARRAY_START,    null);
            case "]":
                return array(self::TOKEN_ARRAY_END,      null);
            case ",":
                return array(self::TOKEN_ITEM_SEPARATOR, null);
            case "\"":
                return array(self::TOKEN_SCALAR, $this->readString($char));
            default:
                return array(self::TOKEN_SCALAR, $this->readScalar($char));
        }
    }

    /**
     * @param $char
     * @throws Exception
     * @return string
     */
    protected function readString($char)
    {
        $quotes  = $char;
        $buffer  = "";

        $escaped = false;
        $size = 0;

        $buffer = fopen('php://temp', 'w+');

        do {
            $chunk = fread($this->stream, $this->threshold);
            $length = strlen($chunk);
216
217
            if (preg_match('/(?<!\\\\)(?:\\\\{2})*\\K"/', $chunk, $matches, PREG_OFFSET_CAPTURE)) {
                $tail = substr($chunk, 0, $matches[0][1]);
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
                $end = strlen($tail);
                $size += $end;
                fwrite($buffer, $tail);
                fseek($this->stream, (-$length+$end+1), SEEK_CUR);
                rewind($buffer);
                if ($size < $this->threshold) {
                    return stream_get_contents($buffer);
                }

                return $buffer;
            }

            fwrite($buffer, $chunk);
            $size += $this->threshold;
        } while ($chunk);

        return $buffer;

        while (true) {
            $char = fread($this->stream, 1);
            // Unterminated string (waiting for quotes)
            if ($char === false || $char === "") {
                throw new Exception("String not terminated correctly " . ftell($this->stream));
            }

            // Terminated string
            if ($quotes == $char && !$escaped) {
                if (is_resource($buffer)) {
                    return $buffer;
                } else {
                    return json_decode($quotes . $buffer . $quotes);
                }
            }

            // Continued
            if (is_string($buffer)) {
                $buffer .= $char;
                $size++;

                if (strlen($buffer) == $this->threshold) {
                    $tmp = fopen('php://temp', 'w+');
                    fwrite($tmp, $buffer);
                    $buffer = $tmp;
                }
            } else {
                fwrite($buffer, $char);
            }

            $escaped = !$escaped && $quotes === "\"" && $char == "\\";
        }
    }

    /**
     * @param $char
     * @return string
     * @throws Exception
     */
    protected function readScalar($char)
    {
        $buffer = $char;

        while (true) {
            $char = $this->readSymbol();
            if ($char === "" || $char === false || strpos(",}] \t\n\r", $char) !== false) {
                if ($char && strpos(",}]", $char) !== false) {
                    $this->buffered[] = $char;
                }
                break;
            }
            $buffer .= $char;
        }

        switch ($buffer) {
            case "true":
                return true;
            case "false":
                return false;
            case "null":
                return null;
        }

        if (!preg_match('/^-?(?:0|[1-9]\d*)?(?:\.\d+)?(?:[eE][+-]?\d+)?$/', $buffer)) {
            throw new Exception(sprintf("Scalar value \"%s\" is invalid", $buffer));
        }

        return floatval($buffer);
    }

    /**
     * @return string
     */
    protected function findSymbol()
    {
        while (($char = $this->readSymbol()) && strpos(" \n\r\t", $char) !== false);

        return $char;
    }

    /**
     * @return string
     */
    protected function readSymbol()
    {
        if ($this->buffered) {
            return array_pop($this->buffered);
        }

        return fread($this->stream, 1);
    }

    /**
     * @return mixed
     */
    public function context()
    {
        return end($this->context);
    }
}