comparison perl-5.22.2/pp_pack.c @ 8045:a16537d2fe07

<xfix> tar xf perl-5.22.2.tar.gz # Ah, whatever, I\'m doing it anyway
author HackBot
date Sat, 14 May 2016 14:54:38 +0000
parents
children
comparison
equal deleted inserted replaced
8044:711c038a7dce 8045:a16537d2fe07
1 /* pp_pack.c
2 *
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
11 /*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
19 */
20
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
31 #include "EXTERN.h"
32 #define PERL_IN_PP_PACK_C
33 #include "perl.h"
34
35 /* Types used by pack/unpack */
36 typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40 } howlen_t;
41
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55 } tempsym_t;
56
57 #define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
73 typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76 } NV_bytes;
77
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79 typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82 } ld_bytes;
83 #endif
84
85 #ifndef CHAR_BIT
86 # define CHAR_BIT 8
87 #endif
88 /* Maximum number of bytes to which a byte can grow due to upgrade */
89 #define UTF8_EXPAND 2
90
91 /*
92 * Offset for integer pack/unpack.
93 *
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
96 */
97
98 /*
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
104 */
105 /*
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
108 */
109 #define SIZE16 2
110 #define SIZE32 4
111
112 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113 --jhi Feb 1999 */
114
115 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
116 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
117 # define OFF16(p) ((char*)(p))
118 # define OFF32(p) ((char*)(p))
119 # else
120 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
121 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
123 # else
124 ++++ bad cray byte order
125 # endif
126 # endif
127 #else
128 # define OFF16(p) ((char *) (p))
129 # define OFF32(p) ((char *) (p))
130 #endif
131
132 #define PUSH16(utf8, cur, p, needs_swap) \
133 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
134 #define PUSH32(utf8, cur, p, needs_swap) \
135 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
136
137 #if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
138 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
139 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
140 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
141 #else
142 # error "Unsupported byteorder"
143 /* Need to add code here to re-instate mixed endian support.
144 NEEDS_SWAP would need to hold a flag indicating which action to
145 take, and S_reverse_copy and the code in S_utf8_to_bytes would need
146 logic adding to deal with any mixed-endian transformations needed.
147 */
148 #endif
149
150 /* Only to be used inside a loop (see the break) */
151 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
152 STMT_START { \
153 if (UNLIKELY(utf8)) { \
154 if (!S_utf8_to_bytes(aTHX_ &s, strend, \
155 (char *) (buf), len, datumtype)) break; \
156 } else { \
157 if (UNLIKELY(needs_swap)) \
158 S_reverse_copy(s, (char *) (buf), len); \
159 else \
160 Copy(s, (char *) (buf), len, char); \
161 s += len; \
162 } \
163 } STMT_END
164
165 #define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
166 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
167
168 #define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
169 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
170
171 #define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
172 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
173
174 #define PUSH_VAR(utf8, aptr, var, needs_swap) \
175 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
176
177 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
178 #define MAX_SUB_TEMPLATE_LEVEL 100
179
180 /* flags (note that type modifiers can also be used as flags!) */
181 #define FLAG_WAS_UTF8 0x40
182 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
183 #define FLAG_UNPACK_ONLY_ONE 0x10
184 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
185 #define FLAG_SLASH 0x04
186 #define FLAG_COMMA 0x02
187 #define FLAG_PACK 0x01
188
189 STATIC SV *
190 S_mul128(pTHX_ SV *sv, U8 m)
191 {
192 STRLEN len;
193 char *s = SvPV(sv, len);
194 char *t;
195
196 PERL_ARGS_ASSERT_MUL128;
197
198 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
199 SV * const tmpNew = newSVpvs("0000000000");
200
201 sv_catsv(tmpNew, sv);
202 SvREFCNT_dec(sv); /* free old sv */
203 sv = tmpNew;
204 s = SvPV(sv, len);
205 }
206 t = s + len - 1;
207 while (!*t) /* trailing '\0'? */
208 t--;
209 while (t > s) {
210 const U32 i = ((*t - '0') << 7) + m;
211 *(t--) = '0' + (char)(i % 10);
212 m = (char)(i / 10);
213 }
214 return (sv);
215 }
216
217 /* Explosives and implosives. */
218
219 #define ISUUCHAR(ch) (NATIVE_TO_LATIN1(ch) >= NATIVE_TO_LATIN1(' ') \
220 && NATIVE_TO_LATIN1(ch) < NATIVE_TO_LATIN1('a'))
221
222 /* type modifiers */
223 #define TYPE_IS_SHRIEKING 0x100
224 #define TYPE_IS_BIG_ENDIAN 0x200
225 #define TYPE_IS_LITTLE_ENDIAN 0x400
226 #define TYPE_IS_PACK 0x800
227 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
228 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
229 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
230
231 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
232 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
233
234 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
235
236 #define PACK_SIZE_CANNOT_CSUM 0x80
237 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
238 #define PACK_SIZE_MASK 0x3F
239
240 #include "packsizetables.c"
241
242 static void
243 S_reverse_copy(const char *src, char *dest, STRLEN len)
244 {
245 dest += len;
246 while (len--)
247 *--dest = *src++;
248 }
249
250 STATIC U8
251 utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
252 {
253 STRLEN retlen;
254 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
255 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
256 /* We try to process malformed UTF-8 as much as possible (preferably with
257 warnings), but these two mean we make no progress in the string and
258 might enter an infinite loop */
259 if (retlen == (STRLEN) -1 || retlen == 0)
260 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
261 (int) TYPE_NO_MODIFIERS(datumtype));
262 if (val >= 0x100) {
263 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
264 "Character in '%c' format wrapped in unpack",
265 (int) TYPE_NO_MODIFIERS(datumtype));
266 val &= 0xff;
267 }
268 *s += retlen;
269 return (U8)val;
270 }
271
272 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
273 utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
274 *(U8 *)(s)++)
275
276 STATIC bool
277 S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
278 {
279 UV val;
280 STRLEN retlen;
281 const char *from = *s;
282 int bad = 0;
283 const U32 flags = ckWARN(WARN_UTF8) ?
284 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
285 const bool needs_swap = NEEDS_SWAP(datumtype);
286
287 if (UNLIKELY(needs_swap))
288 buf += buf_len;
289
290 for (;buf_len > 0; buf_len--) {
291 if (from >= end) return FALSE;
292 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
293 if (retlen == (STRLEN) -1 || retlen == 0) {
294 from += UTF8SKIP(from);
295 bad |= 1;
296 } else from += retlen;
297 if (val >= 0x100) {
298 bad |= 2;
299 val &= 0xff;
300 }
301 if (UNLIKELY(needs_swap))
302 *(U8 *)--buf = (U8)val;
303 else
304 *(U8 *)buf++ = (U8)val;
305 }
306 /* We have enough characters for the buffer. Did we have problems ? */
307 if (bad) {
308 if (bad & 1) {
309 /* Rewalk the string fragment while warning */
310 const char *ptr;
311 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
312 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
313 if (ptr >= end) break;
314 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
315 }
316 if (from > end) from = end;
317 }
318 if ((bad & 2))
319 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
320 WARN_PACK : WARN_UNPACK),
321 "Character(s) in '%c' format wrapped in %s",
322 (int) TYPE_NO_MODIFIERS(datumtype),
323 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
324 }
325 *s = from;
326 return TRUE;
327 }
328
329 STATIC char *
330 S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
331 PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8;
332
333 if (UNLIKELY(needs_swap)) {
334 const U8 *p = start + len;
335 while (p-- > start) {
336 append_utf8_from_native_byte(*p, (U8 **) & dest);
337 }
338 } else {
339 const U8 * const end = start + len;
340 while (start < end) {
341 append_utf8_from_native_byte(*start, (U8 **) & dest);
342 start++;
343 }
344 }
345 return dest;
346 }
347
348 #define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
349 STMT_START { \
350 if (UNLIKELY(utf8)) \
351 (cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap); \
352 else { \
353 if (UNLIKELY(needs_swap)) \
354 S_reverse_copy((char *)(buf), cur, len); \
355 else \
356 Copy(buf, cur, len, char); \
357 (cur) += (len); \
358 } \
359 } STMT_END
360
361 #define GROWING(utf8, cat, start, cur, in_len) \
362 STMT_START { \
363 STRLEN glen = (in_len); \
364 if (utf8) glen *= UTF8_EXPAND; \
365 if ((cur) + glen >= (start) + SvLEN(cat)) { \
366 (start) = sv_exp_grow(cat, glen); \
367 (cur) = (start) + SvCUR(cat); \
368 } \
369 } STMT_END
370
371 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
372 STMT_START { \
373 const STRLEN glen = (in_len); \
374 STRLEN gl = glen; \
375 if (utf8) gl *= UTF8_EXPAND; \
376 if ((cur) + gl >= (start) + SvLEN(cat)) { \
377 *cur = '\0'; \
378 SvCUR_set((cat), (cur) - (start)); \
379 (start) = sv_exp_grow(cat, gl); \
380 (cur) = (start) + SvCUR(cat); \
381 } \
382 PUSH_BYTES(utf8, cur, buf, glen, 0); \
383 } STMT_END
384
385 #define PUSH_BYTE(utf8, s, byte) \
386 STMT_START { \
387 if (utf8) { \
388 const U8 au8 = (byte); \
389 (s) = my_bytes_to_utf8(&au8, 1, (s), 0);\
390 } else *(U8 *)(s)++ = (byte); \
391 } STMT_END
392
393 /* Only to be used inside a loop (see the break) */
394 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
395 STMT_START { \
396 STRLEN retlen; \
397 if (str >= end) break; \
398 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
399 if (retlen == (STRLEN) -1 || retlen == 0) { \
400 *cur = '\0'; \
401 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
402 } \
403 str += retlen; \
404 } STMT_END
405
406 static const char *_action( const tempsym_t* symptr )
407 {
408 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
409 }
410
411 /* Returns the sizeof() struct described by pat */
412 STATIC I32
413 S_measure_struct(pTHX_ tempsym_t* symptr)
414 {
415 I32 total = 0;
416
417 PERL_ARGS_ASSERT_MEASURE_STRUCT;
418
419 while (next_symbol(symptr)) {
420 I32 len;
421 int size;
422
423 switch (symptr->howlen) {
424 case e_star:
425 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
426 _action( symptr ) );
427
428 default:
429 /* e_no_len and e_number */
430 len = symptr->length;
431 break;
432 }
433
434 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
435 if (!size) {
436 int star;
437 /* endianness doesn't influence the size of a type */
438 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
439 default:
440 Perl_croak(aTHX_ "Invalid type '%c' in %s",
441 (int)TYPE_NO_MODIFIERS(symptr->code),
442 _action( symptr ) );
443 case '.' | TYPE_IS_SHRIEKING:
444 case '@' | TYPE_IS_SHRIEKING:
445 case '@':
446 case '.':
447 case '/':
448 case 'U': /* XXXX Is it correct? */
449 case 'w':
450 case 'u':
451 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
452 (int) TYPE_NO_MODIFIERS(symptr->code),
453 _action( symptr ) );
454 case '%':
455 size = 0;
456 break;
457 case '(':
458 {
459 tempsym_t savsym = *symptr;
460 symptr->patptr = savsym.grpbeg;
461 symptr->patend = savsym.grpend;
462 /* XXXX Theoretically, we need to measure many times at
463 different positions, since the subexpression may contain
464 alignment commands, but be not of aligned length.
465 Need to detect this and croak(). */
466 size = measure_struct(symptr);
467 *symptr = savsym;
468 break;
469 }
470 case 'X' | TYPE_IS_SHRIEKING:
471 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
472 */
473 if (!len) /* Avoid division by 0 */
474 len = 1;
475 len = total % len; /* Assumed: the start is aligned. */
476 /* FALLTHROUGH */
477 case 'X':
478 size = -1;
479 if (total < len)
480 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
481 break;
482 case 'x' | TYPE_IS_SHRIEKING:
483 if (!len) /* Avoid division by 0 */
484 len = 1;
485 star = total % len; /* Assumed: the start is aligned. */
486 if (star) /* Other portable ways? */
487 len = len - star;
488 else
489 len = 0;
490 /* FALLTHROUGH */
491 case 'x':
492 case 'A':
493 case 'Z':
494 case 'a':
495 size = 1;
496 break;
497 case 'B':
498 case 'b':
499 len = (len + 7)/8;
500 size = 1;
501 break;
502 case 'H':
503 case 'h':
504 len = (len + 1)/2;
505 size = 1;
506 break;
507
508 case 'P':
509 len = 1;
510 size = sizeof(char*);
511 break;
512 }
513 }
514 total += len * size;
515 }
516 return total;
517 }
518
519
520 /* locate matching closing parenthesis or bracket
521 * returns char pointer to char after match, or NULL
522 */
523 STATIC const char *
524 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
525 {
526 PERL_ARGS_ASSERT_GROUP_END;
527
528 while (patptr < patend) {
529 const char c = *patptr++;
530
531 if (isSPACE(c))
532 continue;
533 else if (c == ender)
534 return patptr-1;
535 else if (c == '#') {
536 while (patptr < patend && *patptr != '\n')
537 patptr++;
538 continue;
539 } else if (c == '(')
540 patptr = group_end(patptr, patend, ')') + 1;
541 else if (c == '[')
542 patptr = group_end(patptr, patend, ']') + 1;
543 }
544 Perl_croak(aTHX_ "No group ending character '%c' found in template",
545 ender);
546 NOT_REACHED; /* NOTREACHED */
547 }
548
549
550 /* Convert unsigned decimal number to binary.
551 * Expects a pointer to the first digit and address of length variable
552 * Advances char pointer to 1st non-digit char and returns number
553 */
554 STATIC const char *
555 S_get_num(pTHX_ const char *patptr, I32 *lenptr )
556 {
557 I32 len = *patptr++ - '0';
558
559 PERL_ARGS_ASSERT_GET_NUM;
560
561 while (isDIGIT(*patptr)) {
562 if (len >= 0x7FFFFFFF/10)
563 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
564 len = (len * 10) + (*patptr++ - '0');
565 }
566 *lenptr = len;
567 return patptr;
568 }
569
570 /* The marvellous template parsing routine: Using state stored in *symptr,
571 * locates next template code and count
572 */
573 STATIC bool
574 S_next_symbol(pTHX_ tempsym_t* symptr )
575 {
576 const char* patptr = symptr->patptr;
577 const char* const patend = symptr->patend;
578
579 PERL_ARGS_ASSERT_NEXT_SYMBOL;
580
581 symptr->flags &= ~FLAG_SLASH;
582
583 while (patptr < patend) {
584 if (isSPACE(*patptr))
585 patptr++;
586 else if (*patptr == '#') {
587 patptr++;
588 while (patptr < patend && *patptr != '\n')
589 patptr++;
590 if (patptr < patend)
591 patptr++;
592 } else {
593 /* We should have found a template code */
594 I32 code = *patptr++ & 0xFF;
595 U32 inherited_modifiers = 0;
596
597 if (code == ','){ /* grandfather in commas but with a warning */
598 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
599 symptr->flags |= FLAG_COMMA;
600 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
601 "Invalid type ',' in %s", _action( symptr ) );
602 }
603 continue;
604 }
605
606 /* for '(', skip to ')' */
607 if (code == '(') {
608 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
609 Perl_croak(aTHX_ "()-group starts with a count in %s",
610 _action( symptr ) );
611 symptr->grpbeg = patptr;
612 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
613 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
614 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
615 _action( symptr ) );
616 }
617
618 /* look for group modifiers to inherit */
619 if (TYPE_ENDIANNESS(symptr->flags)) {
620 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
621 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
622 }
623
624 /* look for modifiers */
625 while (patptr < patend) {
626 const char *allowed;
627 I32 modifier;
628 switch (*patptr) {
629 case '!':
630 modifier = TYPE_IS_SHRIEKING;
631 allowed = "sSiIlLxXnNvV@.";
632 break;
633 case '>':
634 modifier = TYPE_IS_BIG_ENDIAN;
635 allowed = ENDIANNESS_ALLOWED_TYPES;
636 break;
637 case '<':
638 modifier = TYPE_IS_LITTLE_ENDIAN;
639 allowed = ENDIANNESS_ALLOWED_TYPES;
640 break;
641 default:
642 allowed = "";
643 modifier = 0;
644 break;
645 }
646
647 if (modifier == 0)
648 break;
649
650 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
651 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
652 allowed, _action( symptr ) );
653
654 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
655 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
656 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
657 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
658 TYPE_ENDIANNESS_MASK)
659 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
660 *patptr, _action( symptr ) );
661
662 if ((code & modifier)) {
663 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
664 "Duplicate modifier '%c' after '%c' in %s",
665 *patptr, (int) TYPE_NO_MODIFIERS(code),
666 _action( symptr ) );
667 }
668
669 code |= modifier;
670 patptr++;
671 }
672
673 /* inherit modifiers */
674 code |= inherited_modifiers;
675
676 /* look for count and/or / */
677 if (patptr < patend) {
678 if (isDIGIT(*patptr)) {
679 patptr = get_num( patptr, &symptr->length );
680 symptr->howlen = e_number;
681
682 } else if (*patptr == '*') {
683 patptr++;
684 symptr->howlen = e_star;
685
686 } else if (*patptr == '[') {
687 const char* lenptr = ++patptr;
688 symptr->howlen = e_number;
689 patptr = group_end( patptr, patend, ']' ) + 1;
690 /* what kind of [] is it? */
691 if (isDIGIT(*lenptr)) {
692 lenptr = get_num( lenptr, &symptr->length );
693 if( *lenptr != ']' )
694 Perl_croak(aTHX_ "Malformed integer in [] in %s",
695 _action( symptr ) );
696 } else {
697 tempsym_t savsym = *symptr;
698 symptr->patend = patptr-1;
699 symptr->patptr = lenptr;
700 savsym.length = measure_struct(symptr);
701 *symptr = savsym;
702 }
703 } else {
704 symptr->howlen = e_no_len;
705 symptr->length = 1;
706 }
707
708 /* try to find / */
709 while (patptr < patend) {
710 if (isSPACE(*patptr))
711 patptr++;
712 else if (*patptr == '#') {
713 patptr++;
714 while (patptr < patend && *patptr != '\n')
715 patptr++;
716 if (patptr < patend)
717 patptr++;
718 } else {
719 if (*patptr == '/') {
720 symptr->flags |= FLAG_SLASH;
721 patptr++;
722 if (patptr < patend &&
723 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
724 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
725 _action( symptr ) );
726 }
727 break;
728 }
729 }
730 } else {
731 /* at end - no count, no / */
732 symptr->howlen = e_no_len;
733 symptr->length = 1;
734 }
735
736 symptr->code = code;
737 symptr->patptr = patptr;
738 return TRUE;
739 }
740 }
741 symptr->patptr = patptr;
742 return FALSE;
743 }
744
745 /*
746 There is no way to cleanly handle the case where we should process the
747 string per byte in its upgraded form while it's really in downgraded form
748 (e.g. estimates like strend-s as an upper bound for the number of
749 characters left wouldn't work). So if we foresee the need of this
750 (pattern starts with U or contains U0), we want to work on the encoded
751 version of the string. Users are advised to upgrade their pack string
752 themselves if they need to do a lot of unpacks like this on it
753 */
754 STATIC bool
755 need_utf8(const char *pat, const char *patend)
756 {
757 bool first = TRUE;
758
759 PERL_ARGS_ASSERT_NEED_UTF8;
760
761 while (pat < patend) {
762 if (pat[0] == '#') {
763 pat++;
764 pat = (const char *) memchr(pat, '\n', patend-pat);
765 if (!pat) return FALSE;
766 } else if (pat[0] == 'U') {
767 if (first || pat[1] == '0') return TRUE;
768 } else first = FALSE;
769 pat++;
770 }
771 return FALSE;
772 }
773
774 STATIC char
775 first_symbol(const char *pat, const char *patend) {
776 PERL_ARGS_ASSERT_FIRST_SYMBOL;
777
778 while (pat < patend) {
779 if (pat[0] != '#') return pat[0];
780 pat++;
781 pat = (const char *) memchr(pat, '\n', patend-pat);
782 if (!pat) return 0;
783 pat++;
784 }
785 return 0;
786 }
787
788 /*
789
790 =head1 Pack and Unpack
791
792 =for apidoc unpackstring
793
794 The engine implementing the unpack() Perl function.
795
796 Using the template pat..patend, this function unpacks the string
797 s..strend into a number of mortal SVs, which it pushes onto the perl
798 argument (@_) stack (so you will need to issue a C<PUTBACK> before and
799 C<SPAGAIN> after the call to this function). It returns the number of
800 pushed elements.
801
802 The strend and patend pointers should point to the byte following the last
803 character of each string.
804
805 Although this function returns its values on the perl argument stack, it
806 doesn't take any parameters from that stack (and thus in particular
807 there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
808 example).
809
810 =cut */
811
812 I32
813 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
814 {
815 tempsym_t sym;
816
817 PERL_ARGS_ASSERT_UNPACKSTRING;
818
819 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
820 else if (need_utf8(pat, patend)) {
821 /* We probably should try to avoid this in case a scalar context call
822 wouldn't get to the "U0" */
823 STRLEN len = strend - s;
824 s = (char *) bytes_to_utf8((U8 *) s, &len);
825 SAVEFREEPV(s);
826 strend = s + len;
827 flags |= FLAG_DO_UTF8;
828 }
829
830 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
831 flags |= FLAG_PARSE_UTF8;
832
833 TEMPSYM_INIT(&sym, pat, patend, flags);
834
835 return unpack_rec(&sym, s, s, strend, NULL );
836 }
837
838 STATIC I32
839 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
840 {
841 dSP;
842 SV *sv = NULL;
843 const I32 start_sp_offset = SP - PL_stack_base;
844 howlen_t howlen;
845 I32 checksum = 0;
846 UV cuv = 0;
847 NV cdouble = 0.0;
848 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
849 bool beyond = FALSE;
850 bool explicit_length;
851 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
852 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
853
854 PERL_ARGS_ASSERT_UNPACK_REC;
855
856 symptr->strbeg = s - strbeg;
857
858 while (next_symbol(symptr)) {
859 packprops_t props;
860 I32 len;
861 I32 datumtype = symptr->code;
862 bool needs_swap;
863 /* do first one only unless in list context
864 / is implemented by unpacking the count, then popping it from the
865 stack, so must check that we're not in the middle of a / */
866 if ( unpack_only_one
867 && (SP - PL_stack_base == start_sp_offset + 1)
868 && (datumtype != '/') ) /* XXX can this be omitted */
869 break;
870
871 switch (howlen = symptr->howlen) {
872 case e_star:
873 len = strend - strbeg; /* long enough */
874 break;
875 default:
876 /* e_no_len and e_number */
877 len = symptr->length;
878 break;
879 }
880
881 explicit_length = TRUE;
882 redo_switch:
883 beyond = s >= strend;
884
885 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
886 if (props) {
887 /* props nonzero means we can process this letter. */
888 const long size = props & PACK_SIZE_MASK;
889 const long howmany = (strend - s) / size;
890 if (len > howmany)
891 len = howmany;
892
893 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
894 if (len && unpack_only_one) len = 1;
895 EXTEND(SP, len);
896 EXTEND_MORTAL(len);
897 }
898 }
899
900 needs_swap = NEEDS_SWAP(datumtype);
901
902 switch(TYPE_NO_ENDIANNESS(datumtype)) {
903 default:
904 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
905
906 case '%':
907 if (howlen == e_no_len)
908 len = 16; /* len is not specified */
909 checksum = len;
910 cuv = 0;
911 cdouble = 0;
912 continue;
913
914 case '(':
915 {
916 tempsym_t savsym = *symptr;
917 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
918 symptr->flags |= group_modifiers;
919 symptr->patend = savsym.grpend;
920 symptr->previous = &savsym;
921 symptr->level++;
922 PUTBACK;
923 if (len && unpack_only_one) len = 1;
924 while (len--) {
925 symptr->patptr = savsym.grpbeg;
926 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
927 else symptr->flags &= ~FLAG_PARSE_UTF8;
928 unpack_rec(symptr, s, strbeg, strend, &s);
929 if (s == strend && savsym.howlen == e_star)
930 break; /* No way to continue */
931 }
932 SPAGAIN;
933 savsym.flags = symptr->flags & ~group_modifiers;
934 *symptr = savsym;
935 break;
936 }
937 case '.' | TYPE_IS_SHRIEKING:
938 case '.': {
939 const char *from;
940 SV *sv;
941 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
942 if (howlen == e_star) from = strbeg;
943 else if (len <= 0) from = s;
944 else {
945 tempsym_t *group = symptr;
946
947 while (--len && group) group = group->previous;
948 from = group ? strbeg + group->strbeg : strbeg;
949 }
950 sv = from <= s ?
951 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
952 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
953 mXPUSHs(sv);
954 break;
955 }
956 case '@' | TYPE_IS_SHRIEKING:
957 case '@':
958 s = strbeg + symptr->strbeg;
959 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
960 {
961 while (len > 0) {
962 if (s >= strend)
963 Perl_croak(aTHX_ "'@' outside of string in unpack");
964 s += UTF8SKIP(s);
965 len--;
966 }
967 if (s > strend)
968 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
969 } else {
970 if (strend-s < len)
971 Perl_croak(aTHX_ "'@' outside of string in unpack");
972 s += len;
973 }
974 break;
975 case 'X' | TYPE_IS_SHRIEKING:
976 if (!len) /* Avoid division by 0 */
977 len = 1;
978 if (utf8) {
979 const char *hop, *last;
980 I32 l = len;
981 hop = last = strbeg;
982 while (hop < s) {
983 hop += UTF8SKIP(hop);
984 if (--l == 0) {
985 last = hop;
986 l = len;
987 }
988 }
989 if (last > s)
990 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
991 s = last;
992 break;
993 }
994 len = (s - strbeg) % len;
995 /* FALLTHROUGH */
996 case 'X':
997 if (utf8) {
998 while (len > 0) {
999 if (s <= strbeg)
1000 Perl_croak(aTHX_ "'X' outside of string in unpack");
1001 while (--s, UTF8_IS_CONTINUATION(*s)) {
1002 if (s <= strbeg)
1003 Perl_croak(aTHX_ "'X' outside of string in unpack");
1004 }
1005 len--;
1006 }
1007 } else {
1008 if (len > s - strbeg)
1009 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1010 s -= len;
1011 }
1012 break;
1013 case 'x' | TYPE_IS_SHRIEKING: {
1014 I32 ai32;
1015 if (!len) /* Avoid division by 0 */
1016 len = 1;
1017 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1018 else ai32 = (s - strbeg) % len;
1019 if (ai32 == 0) break;
1020 len -= ai32;
1021 }
1022 /* FALLTHROUGH */
1023 case 'x':
1024 if (utf8) {
1025 while (len>0) {
1026 if (s >= strend)
1027 Perl_croak(aTHX_ "'x' outside of string in unpack");
1028 s += UTF8SKIP(s);
1029 len--;
1030 }
1031 } else {
1032 if (len > strend - s)
1033 Perl_croak(aTHX_ "'x' outside of string in unpack");
1034 s += len;
1035 }
1036 break;
1037 case '/':
1038 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1039
1040 case 'A':
1041 case 'Z':
1042 case 'a':
1043 if (checksum) {
1044 /* Preliminary length estimate is assumed done in 'W' */
1045 if (len > strend - s) len = strend - s;
1046 goto W_checksum;
1047 }
1048 if (utf8) {
1049 I32 l;
1050 const char *hop;
1051 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1052 if (hop >= strend) {
1053 if (hop > strend)
1054 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1055 break;
1056 }
1057 }
1058 if (hop > strend)
1059 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1060 len = hop - s;
1061 } else if (len > strend - s)
1062 len = strend - s;
1063
1064 if (datumtype == 'Z') {
1065 /* 'Z' strips stuff after first null */
1066 const char *ptr, *end;
1067 end = s + len;
1068 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1069 sv = newSVpvn(s, ptr-s);
1070 if (howlen == e_star) /* exact for 'Z*' */
1071 len = ptr-s + (ptr != strend ? 1 : 0);
1072 } else if (datumtype == 'A') {
1073 /* 'A' strips both nulls and spaces */
1074 const char *ptr;
1075 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1076 for (ptr = s+len-1; ptr >= s; ptr--)
1077 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1078 !isSPACE_utf8(ptr)) break;
1079 if (ptr >= s) ptr += UTF8SKIP(ptr);
1080 else ptr++;
1081 if (ptr > s+len)
1082 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1083 } else {
1084 for (ptr = s+len-1; ptr >= s; ptr--)
1085 if (*ptr != 0 && !isSPACE(*ptr)) break;
1086 ptr++;
1087 }
1088 sv = newSVpvn(s, ptr-s);
1089 } else sv = newSVpvn(s, len);
1090
1091 if (utf8) {
1092 SvUTF8_on(sv);
1093 /* Undo any upgrade done due to need_utf8() */
1094 if (!(symptr->flags & FLAG_WAS_UTF8))
1095 sv_utf8_downgrade(sv, 0);
1096 }
1097 mXPUSHs(sv);
1098 s += len;
1099 break;
1100 case 'B':
1101 case 'b': {
1102 char *str;
1103 if (howlen == e_star || len > (strend - s) * 8)
1104 len = (strend - s) * 8;
1105 if (checksum) {
1106 if (utf8)
1107 while (len >= 8 && s < strend) {
1108 cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)];
1109 len -= 8;
1110 }
1111 else
1112 while (len >= 8) {
1113 cuv += PL_bitcount[*(U8 *)s++];
1114 len -= 8;
1115 }
1116 if (len && s < strend) {
1117 U8 bits;
1118 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1119 if (datumtype == 'b')
1120 while (len-- > 0) {
1121 if (bits & 1) cuv++;
1122 bits >>= 1;
1123 }
1124 else
1125 while (len-- > 0) {
1126 if (bits & 0x80) cuv++;
1127 bits <<= 1;
1128 }
1129 }
1130 break;
1131 }
1132
1133 sv = sv_2mortal(newSV(len ? len : 1));
1134 SvPOK_on(sv);
1135 str = SvPVX(sv);
1136 if (datumtype == 'b') {
1137 U8 bits = 0;
1138 const I32 ai32 = len;
1139 for (len = 0; len < ai32; len++) {
1140 if (len & 7) bits >>= 1;
1141 else if (utf8) {
1142 if (s >= strend) break;
1143 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1144 } else bits = *(U8 *) s++;
1145 *str++ = bits & 1 ? '1' : '0';
1146 }
1147 } else {
1148 U8 bits = 0;
1149 const I32 ai32 = len;
1150 for (len = 0; len < ai32; len++) {
1151 if (len & 7) bits <<= 1;
1152 else if (utf8) {
1153 if (s >= strend) break;
1154 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1155 } else bits = *(U8 *) s++;
1156 *str++ = bits & 0x80 ? '1' : '0';
1157 }
1158 }
1159 *str = '\0';
1160 SvCUR_set(sv, str - SvPVX_const(sv));
1161 XPUSHs(sv);
1162 break;
1163 }
1164 case 'H':
1165 case 'h': {
1166 char *str = NULL;
1167 /* Preliminary length estimate, acceptable for utf8 too */
1168 if (howlen == e_star || len > (strend - s) * 2)
1169 len = (strend - s) * 2;
1170 if (!checksum) {
1171 sv = sv_2mortal(newSV(len ? len : 1));
1172 SvPOK_on(sv);
1173 str = SvPVX(sv);
1174 }
1175 if (datumtype == 'h') {
1176 U8 bits = 0;
1177 I32 ai32 = len;
1178 for (len = 0; len < ai32; len++) {
1179 if (len & 1) bits >>= 4;
1180 else if (utf8) {
1181 if (s >= strend) break;
1182 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1183 } else bits = * (U8 *) s++;
1184 if (!checksum)
1185 *str++ = PL_hexdigit[bits & 15];
1186 }
1187 } else {
1188 U8 bits = 0;
1189 const I32 ai32 = len;
1190 for (len = 0; len < ai32; len++) {
1191 if (len & 1) bits <<= 4;
1192 else if (utf8) {
1193 if (s >= strend) break;
1194 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1195 } else bits = *(U8 *) s++;
1196 if (!checksum)
1197 *str++ = PL_hexdigit[(bits >> 4) & 15];
1198 }
1199 }
1200 if (!checksum) {
1201 *str = '\0';
1202 SvCUR_set(sv, str - SvPVX_const(sv));
1203 XPUSHs(sv);
1204 }
1205 break;
1206 }
1207 case 'C':
1208 if (len == 0) {
1209 if (explicit_length)
1210 /* Switch to "character" mode */
1211 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1212 break;
1213 }
1214 /* FALLTHROUGH */
1215 case 'c':
1216 while (len-- > 0 && s < strend) {
1217 int aint;
1218 if (utf8)
1219 {
1220 STRLEN retlen;
1221 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1222 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1223 if (retlen == (STRLEN) -1 || retlen == 0)
1224 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1225 s += retlen;
1226 }
1227 else
1228 aint = *(U8 *)(s)++;
1229 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1230 aint -= 256;
1231 if (!checksum)
1232 mPUSHi(aint);
1233 else if (checksum > bits_in_uv)
1234 cdouble += (NV)aint;
1235 else
1236 cuv += aint;
1237 }
1238 break;
1239 case 'W':
1240 W_checksum:
1241 if (utf8) {
1242 while (len-- > 0 && s < strend) {
1243 STRLEN retlen;
1244 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1245 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1246 if (retlen == (STRLEN) -1 || retlen == 0)
1247 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1248 s += retlen;
1249 if (!checksum)
1250 mPUSHu(val);
1251 else if (checksum > bits_in_uv)
1252 cdouble += (NV) val;
1253 else
1254 cuv += val;
1255 }
1256 } else if (!checksum)
1257 while (len-- > 0) {
1258 const U8 ch = *(U8 *) s++;
1259 mPUSHu(ch);
1260 }
1261 else if (checksum > bits_in_uv)
1262 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1263 else
1264 while (len-- > 0) cuv += *(U8 *) s++;
1265 break;
1266 case 'U':
1267 if (len == 0) {
1268 if (explicit_length && howlen != e_star) {
1269 /* Switch to "bytes in UTF-8" mode */
1270 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1271 else
1272 /* Should be impossible due to the need_utf8() test */
1273 Perl_croak(aTHX_ "U0 mode on a byte string");
1274 }
1275 break;
1276 }
1277 if (len > strend - s) len = strend - s;
1278 if (!checksum) {
1279 if (len && unpack_only_one) len = 1;
1280 EXTEND(SP, len);
1281 EXTEND_MORTAL(len);
1282 }
1283 while (len-- > 0 && s < strend) {
1284 STRLEN retlen;
1285 UV auv;
1286 if (utf8) {
1287 U8 result[UTF8_MAXLEN];
1288 const char *ptr = s;
1289 STRLEN len;
1290 /* Bug: warns about bad utf8 even if we are short on bytes
1291 and will break out of the loop */
1292 if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1293 'U'))
1294 break;
1295 len = UTF8SKIP(result);
1296 if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
1297 (char *) &result[1], len-1, 'U')) break;
1298 auv = NATIVE_TO_UNI(utf8n_to_uvchr(result,
1299 len,
1300 &retlen,
1301 UTF8_ALLOW_DEFAULT));
1302 s = ptr;
1303 } else {
1304 auv = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s,
1305 strend - s,
1306 &retlen,
1307 UTF8_ALLOW_DEFAULT));
1308 if (retlen == (STRLEN) -1 || retlen == 0)
1309 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1310 s += retlen;
1311 }
1312 if (!checksum)
1313 mPUSHu(auv);
1314 else if (checksum > bits_in_uv)
1315 cdouble += (NV) auv;
1316 else
1317 cuv += auv;
1318 }
1319 break;
1320 case 's' | TYPE_IS_SHRIEKING:
1321 #if SHORTSIZE != SIZE16
1322 while (len-- > 0) {
1323 short ashort;
1324 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
1325 if (!checksum)
1326 mPUSHi(ashort);
1327 else if (checksum > bits_in_uv)
1328 cdouble += (NV)ashort;
1329 else
1330 cuv += ashort;
1331 }
1332 break;
1333 #else
1334 /* FALLTHROUGH */
1335 #endif
1336 case 's':
1337 while (len-- > 0) {
1338 I16 ai16;
1339
1340 #if U16SIZE > SIZE16
1341 ai16 = 0;
1342 #endif
1343 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1344 #if U16SIZE > SIZE16
1345 if (ai16 > 32767)
1346 ai16 -= 65536;
1347 #endif
1348 if (!checksum)
1349 mPUSHi(ai16);
1350 else if (checksum > bits_in_uv)
1351 cdouble += (NV)ai16;
1352 else
1353 cuv += ai16;
1354 }
1355 break;
1356 case 'S' | TYPE_IS_SHRIEKING:
1357 #if SHORTSIZE != SIZE16
1358 while (len-- > 0) {
1359 unsigned short aushort;
1360 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap,
1361 needs_swap);
1362 if (!checksum)
1363 mPUSHu(aushort);
1364 else if (checksum > bits_in_uv)
1365 cdouble += (NV)aushort;
1366 else
1367 cuv += aushort;
1368 }
1369 break;
1370 #else
1371 /* FALLTHROUGH */
1372 #endif
1373 case 'v':
1374 case 'n':
1375 case 'S':
1376 while (len-- > 0) {
1377 U16 au16;
1378 #if U16SIZE > SIZE16
1379 au16 = 0;
1380 #endif
1381 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
1382 if (datumtype == 'n')
1383 au16 = PerlSock_ntohs(au16);
1384 if (datumtype == 'v')
1385 au16 = vtohs(au16);
1386 if (!checksum)
1387 mPUSHu(au16);
1388 else if (checksum > bits_in_uv)
1389 cdouble += (NV) au16;
1390 else
1391 cuv += au16;
1392 }
1393 break;
1394 case 'v' | TYPE_IS_SHRIEKING:
1395 case 'n' | TYPE_IS_SHRIEKING:
1396 while (len-- > 0) {
1397 I16 ai16;
1398 # if U16SIZE > SIZE16
1399 ai16 = 0;
1400 # endif
1401 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1402 /* There should never be any byte-swapping here. */
1403 assert(!TYPE_ENDIANNESS(datumtype));
1404 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1405 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1406 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1407 ai16 = (I16) vtohs((U16) ai16);
1408 if (!checksum)
1409 mPUSHi(ai16);
1410 else if (checksum > bits_in_uv)
1411 cdouble += (NV) ai16;
1412 else
1413 cuv += ai16;
1414 }
1415 break;
1416 case 'i':
1417 case 'i' | TYPE_IS_SHRIEKING:
1418 while (len-- > 0) {
1419 int aint;
1420 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
1421 if (!checksum)
1422 mPUSHi(aint);
1423 else if (checksum > bits_in_uv)
1424 cdouble += (NV)aint;
1425 else
1426 cuv += aint;
1427 }
1428 break;
1429 case 'I':
1430 case 'I' | TYPE_IS_SHRIEKING:
1431 while (len-- > 0) {
1432 unsigned int auint;
1433 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
1434 if (!checksum)
1435 mPUSHu(auint);
1436 else if (checksum > bits_in_uv)
1437 cdouble += (NV)auint;
1438 else
1439 cuv += auint;
1440 }
1441 break;
1442 case 'j':
1443 while (len-- > 0) {
1444 IV aiv;
1445 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
1446 if (!checksum)
1447 mPUSHi(aiv);
1448 else if (checksum > bits_in_uv)
1449 cdouble += (NV)aiv;
1450 else
1451 cuv += aiv;
1452 }
1453 break;
1454 case 'J':
1455 while (len-- > 0) {
1456 UV auv;
1457 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
1458 if (!checksum)
1459 mPUSHu(auv);
1460 else if (checksum > bits_in_uv)
1461 cdouble += (NV)auv;
1462 else
1463 cuv += auv;
1464 }
1465 break;
1466 case 'l' | TYPE_IS_SHRIEKING:
1467 #if LONGSIZE != SIZE32
1468 while (len-- > 0) {
1469 long along;
1470 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
1471 if (!checksum)
1472 mPUSHi(along);
1473 else if (checksum > bits_in_uv)
1474 cdouble += (NV)along;
1475 else
1476 cuv += along;
1477 }
1478 break;
1479 #else
1480 /* FALLTHROUGH */
1481 #endif
1482 case 'l':
1483 while (len-- > 0) {
1484 I32 ai32;
1485 #if U32SIZE > SIZE32
1486 ai32 = 0;
1487 #endif
1488 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1489 #if U32SIZE > SIZE32
1490 if (ai32 > 2147483647) ai32 -= 4294967296;
1491 #endif
1492 if (!checksum)
1493 mPUSHi(ai32);
1494 else if (checksum > bits_in_uv)
1495 cdouble += (NV)ai32;
1496 else
1497 cuv += ai32;
1498 }
1499 break;
1500 case 'L' | TYPE_IS_SHRIEKING:
1501 #if LONGSIZE != SIZE32
1502 while (len-- > 0) {
1503 unsigned long aulong;
1504 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
1505 if (!checksum)
1506 mPUSHu(aulong);
1507 else if (checksum > bits_in_uv)
1508 cdouble += (NV)aulong;
1509 else
1510 cuv += aulong;
1511 }
1512 break;
1513 #else
1514 /* FALLTHROUGH */
1515 #endif
1516 case 'V':
1517 case 'N':
1518 case 'L':
1519 while (len-- > 0) {
1520 U32 au32;
1521 #if U32SIZE > SIZE32
1522 au32 = 0;
1523 #endif
1524 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
1525 if (datumtype == 'N')
1526 au32 = PerlSock_ntohl(au32);
1527 if (datumtype == 'V')
1528 au32 = vtohl(au32);
1529 if (!checksum)
1530 mPUSHu(au32);
1531 else if (checksum > bits_in_uv)
1532 cdouble += (NV)au32;
1533 else
1534 cuv += au32;
1535 }
1536 break;
1537 case 'V' | TYPE_IS_SHRIEKING:
1538 case 'N' | TYPE_IS_SHRIEKING:
1539 while (len-- > 0) {
1540 I32 ai32;
1541 #if U32SIZE > SIZE32
1542 ai32 = 0;
1543 #endif
1544 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1545 /* There should never be any byte swapping here. */
1546 assert(!TYPE_ENDIANNESS(datumtype));
1547 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1548 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1549 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1550 ai32 = (I32)vtohl((U32)ai32);
1551 if (!checksum)
1552 mPUSHi(ai32);
1553 else if (checksum > bits_in_uv)
1554 cdouble += (NV)ai32;
1555 else
1556 cuv += ai32;
1557 }
1558 break;
1559 case 'p':
1560 while (len-- > 0) {
1561 const char *aptr;
1562 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1563 /* newSVpv generates undef if aptr is NULL */
1564 mPUSHs(newSVpv(aptr, 0));
1565 }
1566 break;
1567 case 'w':
1568 {
1569 UV auv = 0;
1570 U32 bytes = 0;
1571
1572 while (len > 0 && s < strend) {
1573 U8 ch;
1574 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1575 auv = (auv << 7) | (ch & 0x7f);
1576 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1577 if (ch < 0x80) {
1578 bytes = 0;
1579 mPUSHu(auv);
1580 len--;
1581 auv = 0;
1582 continue;
1583 }
1584 if (++bytes >= sizeof(UV)) { /* promote to string */
1585 const char *t;
1586
1587 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
1588 while (s < strend) {
1589 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1590 sv = mul128(sv, (U8)(ch & 0x7f));
1591 if (!(ch & 0x80)) {
1592 bytes = 0;
1593 break;
1594 }
1595 }
1596 t = SvPV_nolen_const(sv);
1597 while (*t == '0')
1598 t++;
1599 sv_chop(sv, t);
1600 mPUSHs(sv);
1601 len--;
1602 auv = 0;
1603 }
1604 }
1605 if ((s >= strend) && bytes)
1606 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1607 }
1608 break;
1609 case 'P':
1610 if (symptr->howlen == e_star)
1611 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1612 EXTEND(SP, 1);
1613 if (s + sizeof(char*) <= strend) {
1614 char *aptr;
1615 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1616 /* newSVpvn generates undef if aptr is NULL */
1617 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1618 }
1619 break;
1620 #if defined(HAS_QUAD) && IVSIZE >= 8
1621 case 'q':
1622 while (len-- > 0) {
1623 Quad_t aquad;
1624 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
1625 if (!checksum)
1626 mPUSHs(newSViv((IV)aquad));
1627 else if (checksum > bits_in_uv)
1628 cdouble += (NV)aquad;
1629 else
1630 cuv += aquad;
1631 }
1632 break;
1633 case 'Q':
1634 while (len-- > 0) {
1635 Uquad_t auquad;
1636 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
1637 if (!checksum)
1638 mPUSHs(newSVuv((UV)auquad));
1639 else if (checksum > bits_in_uv)
1640 cdouble += (NV)auquad;
1641 else
1642 cuv += auquad;
1643 }
1644 break;
1645 #endif
1646 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1647 case 'f':
1648 while (len-- > 0) {
1649 float afloat;
1650 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
1651 if (!checksum)
1652 mPUSHn(afloat);
1653 else
1654 cdouble += afloat;
1655 }
1656 break;
1657 case 'd':
1658 while (len-- > 0) {
1659 double adouble;
1660 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
1661 if (!checksum)
1662 mPUSHn(adouble);
1663 else
1664 cdouble += adouble;
1665 }
1666 break;
1667 case 'F':
1668 while (len-- > 0) {
1669 NV_bytes anv;
1670 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1671 datumtype, needs_swap);
1672 if (!checksum)
1673 mPUSHn(anv.nv);
1674 else
1675 cdouble += anv.nv;
1676 }
1677 break;
1678 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1679 case 'D':
1680 while (len-- > 0) {
1681 ld_bytes aldouble;
1682 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1683 sizeof(aldouble.bytes), datumtype, needs_swap);
1684 /* The most common long double format, the x86 80-bit
1685 * extended precision, has either 2 or 6 unused bytes,
1686 * which may contain garbage, which may contain
1687 * unintentional data. While we do zero the bytes of
1688 * the long double data in pack(), here in unpack() we
1689 * don't, because it's really hard to envision that
1690 * reading the long double off aldouble would be
1691 * affected by the unused bytes.
1692 *
1693 * Note that trying to unpack 'long doubles' of 'long
1694 * doubles' packed in another system is in the general
1695 * case doomed without having more detail. */
1696 if (!checksum)
1697 mPUSHn(aldouble.ld);
1698 else
1699 cdouble += aldouble.ld;
1700 }
1701 break;
1702 #endif
1703 case 'u':
1704 if (!checksum) {
1705 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1706 sv = sv_2mortal(newSV(l));
1707 if (l) SvPOK_on(sv);
1708 }
1709
1710 /* Note that all legal uuencoded strings are ASCII printables, so
1711 * have the same representation under UTF-8 vs not. This means we
1712 * can ignore UTF8ness on legal input. For illegal we stop at the
1713 * first failure, and don't report where/what that is, so again we
1714 * can ignore UTF8ness */
1715
1716 while (s < strend && *s != ' ' && ISUUCHAR(*s)) {
1717 I32 a, b, c, d;
1718 char hunk[3];
1719
1720 len = PL_uudmap[*(U8*)s++] & 077;
1721 while (len > 0) {
1722 if (s < strend && ISUUCHAR(*s))
1723 a = PL_uudmap[*(U8*)s++] & 077;
1724 else
1725 a = 0;
1726 if (s < strend && ISUUCHAR(*s))
1727 b = PL_uudmap[*(U8*)s++] & 077;
1728 else
1729 b = 0;
1730 if (s < strend && ISUUCHAR(*s))
1731 c = PL_uudmap[*(U8*)s++] & 077;
1732 else
1733 c = 0;
1734 if (s < strend && ISUUCHAR(*s))
1735 d = PL_uudmap[*(U8*)s++] & 077;
1736 else
1737 d = 0;
1738 hunk[0] = (char)((a << 2) | (b >> 4));
1739 hunk[1] = (char)((b << 4) | (c >> 2));
1740 hunk[2] = (char)((c << 6) | d);
1741 if (!checksum)
1742 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1743 len -= 3;
1744 }
1745 if (*s == '\n')
1746 s++;
1747 else /* possible checksum byte */
1748 if (s + 1 < strend && s[1] == '\n')
1749 s += 2;
1750 }
1751 if (!checksum)
1752 XPUSHs(sv);
1753 break;
1754 } /* End of switch */
1755
1756 if (checksum) {
1757 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1758 (checksum > bits_in_uv &&
1759 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1760 NV trouble, anv;
1761
1762 anv = (NV) (1 << (checksum & 15));
1763 while (checksum >= 16) {
1764 checksum -= 16;
1765 anv *= 65536.0;
1766 }
1767 while (cdouble < 0.0)
1768 cdouble += anv;
1769 cdouble = Perl_modf(cdouble / anv, &trouble);
1770 #ifdef LONGDOUBLE_DOUBLEDOUBLE
1771 /* Workaround for powerpc doubledouble modfl bug:
1772 * close to 1.0L and -1.0L cdouble is 0, and trouble
1773 * is cdouble / anv. */
1774 if (trouble != Perl_ceil(trouble)) {
1775 cdouble = trouble;
1776 if (cdouble > 1.0L) cdouble -= 1.0L;
1777 if (cdouble < -1.0L) cdouble += 1.0L;
1778 }
1779 #endif
1780 cdouble *= anv;
1781 sv = newSVnv(cdouble);
1782 }
1783 else {
1784 if (checksum < bits_in_uv) {
1785 UV mask = ((UV)1 << checksum) - 1;
1786 cuv &= mask;
1787 }
1788 sv = newSVuv(cuv);
1789 }
1790 mXPUSHs(sv);
1791 checksum = 0;
1792 }
1793
1794 if (symptr->flags & FLAG_SLASH){
1795 if (SP - PL_stack_base - start_sp_offset <= 0)
1796 break;
1797 if( next_symbol(symptr) ){
1798 if( symptr->howlen == e_number )
1799 Perl_croak(aTHX_ "Count after length/code in unpack" );
1800 if( beyond ){
1801 /* ...end of char buffer then no decent length available */
1802 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1803 } else {
1804 /* take top of stack (hope it's numeric) */
1805 len = POPi;
1806 if( len < 0 )
1807 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1808 }
1809 } else {
1810 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1811 }
1812 datumtype = symptr->code;
1813 explicit_length = FALSE;
1814 goto redo_switch;
1815 }
1816 }
1817
1818 if (new_s)
1819 *new_s = s;
1820 PUTBACK;
1821 return SP - PL_stack_base - start_sp_offset;
1822 }
1823
1824 PP(pp_unpack)
1825 {
1826 dSP;
1827 dPOPPOPssrl;
1828 I32 gimme = GIMME_V;
1829 STRLEN llen;
1830 STRLEN rlen;
1831 const char *pat = SvPV_const(left, llen);
1832 const char *s = SvPV_const(right, rlen);
1833 const char *strend = s + rlen;
1834 const char *patend = pat + llen;
1835 I32 cnt;
1836
1837 PUTBACK;
1838 cnt = unpackstring(pat, patend, s, strend,
1839 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1840 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
1841
1842 SPAGAIN;
1843 if ( !cnt && gimme == G_SCALAR )
1844 PUSHs(&PL_sv_undef);
1845 RETURN;
1846 }
1847
1848 STATIC U8 *
1849 doencodes(U8 *h, const U8 *s, I32 len)
1850 {
1851 *h++ = PL_uuemap[len];
1852 while (len > 2) {
1853 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1854 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1855 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1856 *h++ = PL_uuemap[(077 & (s[2] & 077))];
1857 s += 3;
1858 len -= 3;
1859 }
1860 if (len > 0) {
1861 const U8 r = (len > 1 ? s[1] : '\0');
1862 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1863 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1864 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1865 *h++ = PL_uuemap[0];
1866 }
1867 *h++ = '\n';
1868 return h;
1869 }
1870
1871 STATIC SV *
1872 S_is_an_int(pTHX_ const char *s, STRLEN l)
1873 {
1874 SV *result = newSVpvn(s, l);
1875 char *const result_c = SvPV_nolen(result); /* convenience */
1876 char *out = result_c;
1877 bool skip = 1;
1878 bool ignore = 0;
1879
1880 PERL_ARGS_ASSERT_IS_AN_INT;
1881
1882 while (*s) {
1883 switch (*s) {
1884 case ' ':
1885 break;
1886 case '+':
1887 if (!skip) {
1888 SvREFCNT_dec(result);
1889 return (NULL);
1890 }
1891 break;
1892 case '0':
1893 case '1':
1894 case '2':
1895 case '3':
1896 case '4':
1897 case '5':
1898 case '6':
1899 case '7':
1900 case '8':
1901 case '9':
1902 skip = 0;
1903 if (!ignore) {
1904 *(out++) = *s;
1905 }
1906 break;
1907 case '.':
1908 ignore = 1;
1909 break;
1910 default:
1911 SvREFCNT_dec(result);
1912 return (NULL);
1913 }
1914 s++;
1915 }
1916 *(out++) = '\0';
1917 SvCUR_set(result, out - result_c);
1918 return (result);
1919 }
1920
1921 /* pnum must be '\0' terminated */
1922 STATIC int
1923 S_div128(pTHX_ SV *pnum, bool *done)
1924 {
1925 STRLEN len;
1926 char * const s = SvPV(pnum, len);
1927 char *t = s;
1928 int m = 0;
1929
1930 PERL_ARGS_ASSERT_DIV128;
1931
1932 *done = 1;
1933 while (*t) {
1934 const int i = m * 10 + (*t - '0');
1935 const int r = (i >> 7); /* r < 10 */
1936 m = i & 0x7F;
1937 if (r) {
1938 *done = 0;
1939 }
1940 *(t++) = '0' + r;
1941 }
1942 *(t++) = '\0';
1943 SvCUR_set(pnum, (STRLEN) (t - s));
1944 return (m);
1945 }
1946
1947 /*
1948 =for apidoc packlist
1949
1950 The engine implementing pack() Perl function.
1951
1952 =cut
1953 */
1954
1955 void
1956 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
1957 {
1958 tempsym_t sym;
1959
1960 PERL_ARGS_ASSERT_PACKLIST;
1961
1962 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
1963
1964 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1965 Also make sure any UTF8 flag is loaded */
1966 SvPV_force_nolen(cat);
1967 if (DO_UTF8(cat))
1968 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
1969
1970 (void)pack_rec( cat, &sym, beglist, endlist );
1971 }
1972
1973 /* like sv_utf8_upgrade, but also repoint the group start markers */
1974 STATIC void
1975 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
1976 STRLEN len;
1977 tempsym_t *group;
1978 const char *from_ptr, *from_start, *from_end, **marks, **m;
1979 char *to_start, *to_ptr;
1980
1981 if (SvUTF8(sv)) return;
1982
1983 from_start = SvPVX_const(sv);
1984 from_end = from_start + SvCUR(sv);
1985 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
1986 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
1987 if (from_ptr == from_end) {
1988 /* Simple case: no character needs to be changed */
1989 SvUTF8_on(sv);
1990 return;
1991 }
1992
1993 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
1994 Newx(to_start, len, char);
1995 Copy(from_start, to_start, from_ptr-from_start, char);
1996 to_ptr = to_start + (from_ptr-from_start);
1997
1998 Newx(marks, sym_ptr->level+2, const char *);
1999 for (group=sym_ptr; group; group = group->previous)
2000 marks[group->level] = from_start + group->strbeg;
2001 marks[sym_ptr->level+1] = from_end+1;
2002 for (m = marks; *m < from_ptr; m++)
2003 *m = to_start + (*m-from_start);
2004
2005 for (;from_ptr < from_end; from_ptr++) {
2006 while (*m == from_ptr) *m++ = to_ptr;
2007 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2008 }
2009 *to_ptr = 0;
2010
2011 while (*m == from_ptr) *m++ = to_ptr;
2012 if (m != marks + sym_ptr->level+1) {
2013 Safefree(marks);
2014 Safefree(to_start);
2015 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2016 "level=%d", m, marks, sym_ptr->level);
2017 }
2018 for (group=sym_ptr; group; group = group->previous)
2019 group->strbeg = marks[group->level] - to_start;
2020 Safefree(marks);
2021
2022 if (SvOOK(sv)) {
2023 if (SvIVX(sv)) {
2024 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2025 from_start -= SvIVX(sv);
2026 SvIV_set(sv, 0);
2027 }
2028 SvFLAGS(sv) &= ~SVf_OOK;
2029 }
2030 if (SvLEN(sv) != 0)
2031 Safefree(from_start);
2032 SvPV_set(sv, to_start);
2033 SvCUR_set(sv, to_ptr - to_start);
2034 SvLEN_set(sv, len);
2035 SvUTF8_on(sv);
2036 }
2037
2038 /* Exponential string grower. Makes string extension effectively O(n)
2039 needed says how many extra bytes we need (not counting the final '\0')
2040 Only grows the string if there is an actual lack of space
2041 */
2042 STATIC char *
2043 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2044 const STRLEN cur = SvCUR(sv);
2045 const STRLEN len = SvLEN(sv);
2046 STRLEN extend;
2047
2048 PERL_ARGS_ASSERT_SV_EXP_GROW;
2049
2050 if (len - cur > needed) return SvPVX(sv);
2051 extend = needed > len ? needed : len;
2052 return SvGROW(sv, len+extend+1);
2053 }
2054
2055 static SV *
2056 S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
2057 {
2058 SvGETMAGIC(sv);
2059 if (UNLIKELY(SvAMAGIC(sv)))
2060 sv = sv_2num(sv);
2061 if (UNLIKELY(isinfnansv(sv))) {
2062 const I32 c = TYPE_NO_MODIFIERS(datumtype);
2063 const NV nv = SvNV_nomg(sv);
2064 if (c == 'w')
2065 Perl_croak(aTHX_ "Cannot compress %"NVgf" in pack", nv);
2066 else
2067 Perl_croak(aTHX_ "Cannot pack %"NVgf" with '%c'", nv, (int) c);
2068 }
2069 return sv;
2070 }
2071
2072 #define SvIV_no_inf(sv,d) \
2073 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
2074 #define SvUV_no_inf(sv,d) \
2075 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
2076
2077 STATIC
2078 SV **
2079 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2080 {
2081 tempsym_t lookahead;
2082 I32 items = endlist - beglist;
2083 bool found = next_symbol(symptr);
2084 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2085 bool warn_utf8 = ckWARN(WARN_UTF8);
2086 char* from;
2087
2088 PERL_ARGS_ASSERT_PACK_REC;
2089
2090 if (symptr->level == 0 && found && symptr->code == 'U') {
2091 marked_upgrade(aTHX_ cat, symptr);
2092 symptr->flags |= FLAG_DO_UTF8;
2093 utf8 = 0;
2094 }
2095 symptr->strbeg = SvCUR(cat);
2096
2097 while (found) {
2098 SV *fromstr;
2099 STRLEN fromlen;
2100 I32 len;
2101 SV *lengthcode = NULL;
2102 I32 datumtype = symptr->code;
2103 howlen_t howlen = symptr->howlen;
2104 char *start = SvPVX(cat);
2105 char *cur = start + SvCUR(cat);
2106 bool needs_swap;
2107
2108 #define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no)
2109 #define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
2110
2111 switch (howlen) {
2112 case e_star:
2113 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2114 0 : items;
2115 break;
2116 default:
2117 /* e_no_len and e_number */
2118 len = symptr->length;
2119 break;
2120 }
2121
2122 if (len) {
2123 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2124
2125 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2126 /* We can process this letter. */
2127 STRLEN size = props & PACK_SIZE_MASK;
2128 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2129 }
2130 }
2131
2132 /* Look ahead for next symbol. Do we have code/code? */
2133 lookahead = *symptr;
2134 found = next_symbol(&lookahead);
2135 if (symptr->flags & FLAG_SLASH) {
2136 IV count;
2137 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2138 if (strchr("aAZ", lookahead.code)) {
2139 if (lookahead.howlen == e_number) count = lookahead.length;
2140 else {
2141 if (items > 0) {
2142 count = sv_len_utf8(*beglist);
2143 }
2144 else count = 0;
2145 if (lookahead.code == 'Z') count++;
2146 }
2147 } else {
2148 if (lookahead.howlen == e_number && lookahead.length < items)
2149 count = lookahead.length;
2150 else count = items;
2151 }
2152 lookahead.howlen = e_number;
2153 lookahead.length = count;
2154 lengthcode = sv_2mortal(newSViv(count));
2155 }
2156
2157 needs_swap = NEEDS_SWAP(datumtype);
2158
2159 /* Code inside the switch must take care to properly update
2160 cat (CUR length and '\0' termination) if it updated *cur and
2161 doesn't simply leave using break */
2162 switch (TYPE_NO_ENDIANNESS(datumtype)) {
2163 default:
2164 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2165 (int) TYPE_NO_MODIFIERS(datumtype));
2166 case '%':
2167 Perl_croak(aTHX_ "'%%' may not be used in pack");
2168
2169 case '.' | TYPE_IS_SHRIEKING:
2170 case '.':
2171 if (howlen == e_star) from = start;
2172 else if (len == 0) from = cur;
2173 else {
2174 tempsym_t *group = symptr;
2175
2176 while (--len && group) group = group->previous;
2177 from = group ? start + group->strbeg : start;
2178 }
2179 fromstr = NEXTFROM;
2180 len = SvIV_no_inf(fromstr, datumtype);
2181 goto resize;
2182 case '@' | TYPE_IS_SHRIEKING:
2183 case '@':
2184 from = start + symptr->strbeg;
2185 resize:
2186 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2187 if (len >= 0) {
2188 while (len && from < cur) {
2189 from += UTF8SKIP(from);
2190 len--;
2191 }
2192 if (from > cur)
2193 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2194 if (len) {
2195 /* Here we know from == cur */
2196 grow:
2197 GROWING(0, cat, start, cur, len);
2198 Zero(cur, len, char);
2199 cur += len;
2200 } else if (from < cur) {
2201 len = cur - from;
2202 goto shrink;
2203 } else goto no_change;
2204 } else {
2205 cur = from;
2206 len = -len;
2207 goto utf8_shrink;
2208 }
2209 else {
2210 len -= cur - from;
2211 if (len > 0) goto grow;
2212 if (len == 0) goto no_change;
2213 len = -len;
2214 goto shrink;
2215 }
2216 break;
2217
2218 case '(': {
2219 tempsym_t savsym = *symptr;
2220 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2221 symptr->flags |= group_modifiers;
2222 symptr->patend = savsym.grpend;
2223 symptr->level++;
2224 symptr->previous = &lookahead;
2225 while (len--) {
2226 U32 was_utf8;
2227 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2228 else symptr->flags &= ~FLAG_PARSE_UTF8;
2229 was_utf8 = SvUTF8(cat);
2230 symptr->patptr = savsym.grpbeg;
2231 beglist = pack_rec(cat, symptr, beglist, endlist);
2232 if (SvUTF8(cat) != was_utf8)
2233 /* This had better be an upgrade while in utf8==0 mode */
2234 utf8 = 1;
2235
2236 if (savsym.howlen == e_star && beglist == endlist)
2237 break; /* No way to continue */
2238 }
2239 items = endlist - beglist;
2240 lookahead.flags = symptr->flags & ~group_modifiers;
2241 goto no_change;
2242 }
2243 case 'X' | TYPE_IS_SHRIEKING:
2244 if (!len) /* Avoid division by 0 */
2245 len = 1;
2246 if (utf8) {
2247 char *hop, *last;
2248 I32 l = len;
2249 hop = last = start;
2250 while (hop < cur) {
2251 hop += UTF8SKIP(hop);
2252 if (--l == 0) {
2253 last = hop;
2254 l = len;
2255 }
2256 }
2257 if (last > cur)
2258 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2259 cur = last;
2260 break;
2261 }
2262 len = (cur-start) % len;
2263 /* FALLTHROUGH */
2264 case 'X':
2265 if (utf8) {
2266 if (len < 1) goto no_change;
2267 utf8_shrink:
2268 while (len > 0) {
2269 if (cur <= start)
2270 Perl_croak(aTHX_ "'%c' outside of string in pack",
2271 (int) TYPE_NO_MODIFIERS(datumtype));
2272 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2273 if (cur <= start)
2274 Perl_croak(aTHX_ "'%c' outside of string in pack",
2275 (int) TYPE_NO_MODIFIERS(datumtype));
2276 }
2277 len--;
2278 }
2279 } else {
2280 shrink:
2281 if (cur - start < len)
2282 Perl_croak(aTHX_ "'%c' outside of string in pack",
2283 (int) TYPE_NO_MODIFIERS(datumtype));
2284 cur -= len;
2285 }
2286 if (cur < start+symptr->strbeg) {
2287 /* Make sure group starts don't point into the void */
2288 tempsym_t *group;
2289 const STRLEN length = cur-start;
2290 for (group = symptr;
2291 group && length < group->strbeg;
2292 group = group->previous) group->strbeg = length;
2293 lookahead.strbeg = length;
2294 }
2295 break;
2296 case 'x' | TYPE_IS_SHRIEKING: {
2297 I32 ai32;
2298 if (!len) /* Avoid division by 0 */
2299 len = 1;
2300 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2301 else ai32 = (cur - start) % len;
2302 if (ai32 == 0) goto no_change;
2303 len -= ai32;
2304 }
2305 /* FALLTHROUGH */
2306 case 'x':
2307 goto grow;
2308 case 'A':
2309 case 'Z':
2310 case 'a': {
2311 const char *aptr;
2312
2313 fromstr = NEXTFROM;
2314 aptr = SvPV_const(fromstr, fromlen);
2315 if (DO_UTF8(fromstr)) {
2316 const char *end, *s;
2317
2318 if (!utf8 && !SvUTF8(cat)) {
2319 marked_upgrade(aTHX_ cat, symptr);
2320 lookahead.flags |= FLAG_DO_UTF8;
2321 lookahead.strbeg = symptr->strbeg;
2322 utf8 = 1;
2323 start = SvPVX(cat);
2324 cur = start + SvCUR(cat);
2325 }
2326 if (howlen == e_star) {
2327 if (utf8) goto string_copy;
2328 len = fromlen+1;
2329 }
2330 s = aptr;
2331 end = aptr + fromlen;
2332 fromlen = datumtype == 'Z' ? len-1 : len;
2333 while ((I32) fromlen > 0 && s < end) {
2334 s += UTF8SKIP(s);
2335 fromlen--;
2336 }
2337 if (s > end)
2338 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2339 if (utf8) {
2340 len = fromlen;
2341 if (datumtype == 'Z') len++;
2342 fromlen = s-aptr;
2343 len += fromlen;
2344
2345 goto string_copy;
2346 }
2347 fromlen = len - fromlen;
2348 if (datumtype == 'Z') fromlen--;
2349 if (howlen == e_star) {
2350 len = fromlen;
2351 if (datumtype == 'Z') len++;
2352 }
2353 GROWING(0, cat, start, cur, len);
2354 if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2355 datumtype | TYPE_IS_PACK))
2356 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2357 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2358 (int)datumtype, aptr, end, cur, (UV)fromlen);
2359 cur += fromlen;
2360 len -= fromlen;
2361 } else if (utf8) {
2362 if (howlen == e_star) {
2363 len = fromlen;
2364 if (datumtype == 'Z') len++;
2365 }
2366 if (len <= (I32) fromlen) {
2367 fromlen = len;
2368 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2369 }
2370 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2371 upgrade, so:
2372 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2373 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2374 len -= fromlen;
2375 while (fromlen > 0) {
2376 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2377 aptr++;
2378 fromlen--;
2379 }
2380 } else {
2381 string_copy:
2382 if (howlen == e_star) {
2383 len = fromlen;
2384 if (datumtype == 'Z') len++;
2385 }
2386 if (len <= (I32) fromlen) {
2387 fromlen = len;
2388 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2389 }
2390 GROWING(0, cat, start, cur, len);
2391 Copy(aptr, cur, fromlen, char);
2392 cur += fromlen;
2393 len -= fromlen;
2394 }
2395 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2396 cur += len;
2397 SvTAINT(cat);
2398 break;
2399 }
2400 case 'B':
2401 case 'b': {
2402 const char *str, *end;
2403 I32 l, field_len;
2404 U8 bits;
2405 bool utf8_source;
2406 U32 utf8_flags;
2407
2408 fromstr = NEXTFROM;
2409 str = SvPV_const(fromstr, fromlen);
2410 end = str + fromlen;
2411 if (DO_UTF8(fromstr)) {
2412 utf8_source = TRUE;
2413 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2414 } else {
2415 utf8_source = FALSE;
2416 utf8_flags = 0; /* Unused, but keep compilers happy */
2417 }
2418 if (howlen == e_star) len = fromlen;
2419 field_len = (len+7)/8;
2420 GROWING(utf8, cat, start, cur, field_len);
2421 if (len > (I32)fromlen) len = fromlen;
2422 bits = 0;
2423 l = 0;
2424 if (datumtype == 'B')
2425 while (l++ < len) {
2426 if (utf8_source) {
2427 UV val = 0;
2428 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2429 bits |= val & 1;
2430 } else bits |= *str++ & 1;
2431 if (l & 7) bits <<= 1;
2432 else {
2433 PUSH_BYTE(utf8, cur, bits);
2434 bits = 0;
2435 }
2436 }
2437 else
2438 /* datumtype == 'b' */
2439 while (l++ < len) {
2440 if (utf8_source) {
2441 UV val = 0;
2442 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2443 if (val & 1) bits |= 0x80;
2444 } else if (*str++ & 1)
2445 bits |= 0x80;
2446 if (l & 7) bits >>= 1;
2447 else {
2448 PUSH_BYTE(utf8, cur, bits);
2449 bits = 0;
2450 }
2451 }
2452 l--;
2453 if (l & 7) {
2454 if (datumtype == 'B')
2455 bits <<= 7 - (l & 7);
2456 else
2457 bits >>= 7 - (l & 7);
2458 PUSH_BYTE(utf8, cur, bits);
2459 l += 7;
2460 }
2461 /* Determine how many chars are left in the requested field */
2462 l /= 8;
2463 if (howlen == e_star) field_len = 0;
2464 else field_len -= l;
2465 Zero(cur, field_len, char);
2466 cur += field_len;
2467 break;
2468 }
2469 case 'H':
2470 case 'h': {
2471 const char *str, *end;
2472 I32 l, field_len;
2473 U8 bits;
2474 bool utf8_source;
2475 U32 utf8_flags;
2476
2477 fromstr = NEXTFROM;
2478 str = SvPV_const(fromstr, fromlen);
2479 end = str + fromlen;
2480 if (DO_UTF8(fromstr)) {
2481 utf8_source = TRUE;
2482 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2483 } else {
2484 utf8_source = FALSE;
2485 utf8_flags = 0; /* Unused, but keep compilers happy */
2486 }
2487 if (howlen == e_star) len = fromlen;
2488 field_len = (len+1)/2;
2489 GROWING(utf8, cat, start, cur, field_len);
2490 if (!utf8_source && len > (I32)fromlen) len = fromlen;
2491 bits = 0;
2492 l = 0;
2493 if (datumtype == 'H')
2494 while (l++ < len) {
2495 if (utf8_source) {
2496 UV val = 0;
2497 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2498 if (val < 256 && isALPHA(val))
2499 bits |= (val + 9) & 0xf;
2500 else
2501 bits |= val & 0xf;
2502 } else if (isALPHA(*str))
2503 bits |= (*str++ + 9) & 0xf;
2504 else
2505 bits |= *str++ & 0xf;
2506 if (l & 1) bits <<= 4;
2507 else {
2508 PUSH_BYTE(utf8, cur, bits);
2509 bits = 0;
2510 }
2511 }
2512 else
2513 while (l++ < len) {
2514 if (utf8_source) {
2515 UV val = 0;
2516 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2517 if (val < 256 && isALPHA(val))
2518 bits |= ((val + 9) & 0xf) << 4;
2519 else
2520 bits |= (val & 0xf) << 4;
2521 } else if (isALPHA(*str))
2522 bits |= ((*str++ + 9) & 0xf) << 4;
2523 else
2524 bits |= (*str++ & 0xf) << 4;
2525 if (l & 1) bits >>= 4;
2526 else {
2527 PUSH_BYTE(utf8, cur, bits);
2528 bits = 0;
2529 }
2530 }
2531 l--;
2532 if (l & 1) {
2533 PUSH_BYTE(utf8, cur, bits);
2534 l++;
2535 }
2536 /* Determine how many chars are left in the requested field */
2537 l /= 2;
2538 if (howlen == e_star) field_len = 0;
2539 else field_len -= l;
2540 Zero(cur, field_len, char);
2541 cur += field_len;
2542 break;
2543 }
2544 case 'c':
2545 while (len-- > 0) {
2546 IV aiv;
2547 fromstr = NEXTFROM;
2548 aiv = SvIV_no_inf(fromstr, datumtype);
2549 if ((-128 > aiv || aiv > 127))
2550 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2551 "Character in 'c' format wrapped in pack");
2552 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2553 }
2554 break;
2555 case 'C':
2556 if (len == 0) {
2557 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2558 break;
2559 }
2560 while (len-- > 0) {
2561 IV aiv;
2562 fromstr = NEXTFROM;
2563 aiv = SvIV_no_inf(fromstr, datumtype);
2564 if ((0 > aiv || aiv > 0xff))
2565 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2566 "Character in 'C' format wrapped in pack");
2567 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2568 }
2569 break;
2570 case 'W': {
2571 char *end;
2572 U8 in_bytes = (U8)IN_BYTES;
2573
2574 end = start+SvLEN(cat)-1;
2575 if (utf8) end -= UTF8_MAXLEN-1;
2576 while (len-- > 0) {
2577 UV auv;
2578 fromstr = NEXTFROM;
2579 auv = SvUV_no_inf(fromstr, datumtype);
2580 if (in_bytes) auv = auv % 0x100;
2581 if (utf8) {
2582 W_utf8:
2583 if (cur > end) {
2584 *cur = '\0';
2585 SvCUR_set(cat, cur - start);
2586
2587 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2588 end = start+SvLEN(cat)-UTF8_MAXLEN;
2589 }
2590 cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2591 auv,
2592 warn_utf8 ?
2593 0 : UNICODE_ALLOW_ANY);
2594 } else {
2595 if (auv >= 0x100) {
2596 if (!SvUTF8(cat)) {
2597 *cur = '\0';
2598 SvCUR_set(cat, cur - start);
2599 marked_upgrade(aTHX_ cat, symptr);
2600 lookahead.flags |= FLAG_DO_UTF8;
2601 lookahead.strbeg = symptr->strbeg;
2602 utf8 = 1;
2603 start = SvPVX(cat);
2604 cur = start + SvCUR(cat);
2605 end = start+SvLEN(cat)-UTF8_MAXLEN;
2606 goto W_utf8;
2607 }
2608 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2609 "Character in 'W' format wrapped in pack");
2610 auv &= 0xff;
2611 }
2612 if (cur >= end) {
2613 *cur = '\0';
2614 SvCUR_set(cat, cur - start);
2615 GROWING(0, cat, start, cur, len+1);
2616 end = start+SvLEN(cat)-1;
2617 }
2618 *(U8 *) cur++ = (U8)auv;
2619 }
2620 }
2621 break;
2622 }
2623 case 'U': {
2624 char *end;
2625
2626 if (len == 0) {
2627 if (!(symptr->flags & FLAG_DO_UTF8)) {
2628 marked_upgrade(aTHX_ cat, symptr);
2629 lookahead.flags |= FLAG_DO_UTF8;
2630 lookahead.strbeg = symptr->strbeg;
2631 }
2632 utf8 = 0;
2633 goto no_change;
2634 }
2635
2636 end = start+SvLEN(cat);
2637 if (!utf8) end -= UTF8_MAXLEN;
2638 while (len-- > 0) {
2639 UV auv;
2640 fromstr = NEXTFROM;
2641 auv = SvUV_no_inf(fromstr, datumtype);
2642 if (utf8) {
2643 U8 buffer[UTF8_MAXLEN], *endb;
2644 endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv),
2645 warn_utf8 ?
2646 0 : UNICODE_ALLOW_ANY);
2647 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2648 *cur = '\0';
2649 SvCUR_set(cat, cur - start);
2650 GROWING(0, cat, start, cur,
2651 len+(endb-buffer)*UTF8_EXPAND);
2652 end = start+SvLEN(cat);
2653 }
2654 cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0);
2655 } else {
2656 if (cur >= end) {
2657 *cur = '\0';
2658 SvCUR_set(cat, cur - start);
2659 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2660 end = start+SvLEN(cat)-UTF8_MAXLEN;
2661 }
2662 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, UNI_TO_NATIVE(auv),
2663 warn_utf8 ?
2664 0 : UNICODE_ALLOW_ANY);
2665 }
2666 }
2667 break;
2668 }
2669 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2670 case 'f':
2671 while (len-- > 0) {
2672 float afloat;
2673 NV anv;
2674 fromstr = NEXTFROM;
2675 anv = SvNV(fromstr);
2676 # if defined(VMS) && !defined(_IEEE_FP)
2677 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2678 * on Alpha; fake it if we don't have them.
2679 */
2680 if (anv > FLT_MAX)
2681 afloat = FLT_MAX;
2682 else if (anv < -FLT_MAX)
2683 afloat = -FLT_MAX;
2684 else afloat = (float)anv;
2685 # else
2686 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2687 if(Perl_isnan(anv))
2688 afloat = (float)NV_NAN;
2689 else
2690 #endif
2691 /* a simple cast to float is undefined if outside
2692 * the range of values that can be represented */
2693 afloat = (float)(anv > FLT_MAX ? NV_INF :
2694 anv < -FLT_MAX ? -NV_INF : anv);
2695 # endif
2696 PUSH_VAR(utf8, cur, afloat, needs_swap);
2697 }
2698 break;
2699 case 'd':
2700 while (len-- > 0) {
2701 double adouble;
2702 NV anv;
2703 fromstr = NEXTFROM;
2704 anv = SvNV(fromstr);
2705 # if defined(VMS) && !defined(_IEEE_FP)
2706 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2707 * on Alpha; fake it if we don't have them.
2708 */
2709 if (anv > DBL_MAX)
2710 adouble = DBL_MAX;
2711 else if (anv < -DBL_MAX)
2712 adouble = -DBL_MAX;
2713 else adouble = (double)anv;
2714 # else
2715 adouble = (double)anv;
2716 # endif
2717 PUSH_VAR(utf8, cur, adouble, needs_swap);
2718 }
2719 break;
2720 case 'F': {
2721 NV_bytes anv;
2722 Zero(&anv, 1, NV); /* can be long double with unused bits */
2723 while (len-- > 0) {
2724 fromstr = NEXTFROM;
2725 #ifdef __GNUC__
2726 /* to work round a gcc/x86 bug; don't use SvNV */
2727 anv.nv = sv_2nv(fromstr);
2728 # if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \
2729 && LONG_DOUBLESIZE > 10
2730 /* GCC sometimes overwrites the padding in the
2731 assignment above */
2732 Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8);
2733 # endif
2734 #else
2735 anv.nv = SvNV(fromstr);
2736 #endif
2737 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
2738 }
2739 break;
2740 }
2741 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2742 case 'D': {
2743 ld_bytes aldouble;
2744 /* long doubles can have unused bits, which may be nonzero */
2745 Zero(&aldouble, 1, long double);
2746 while (len-- > 0) {
2747 fromstr = NEXTFROM;
2748 # ifdef __GNUC__
2749 /* to work round a gcc/x86 bug; don't use SvNV */
2750 aldouble.ld = (long double)sv_2nv(fromstr);
2751 # if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10
2752 /* GCC sometimes overwrites the padding in the
2753 assignment above */
2754 Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8);
2755 # endif
2756 # else
2757 aldouble.ld = (long double)SvNV(fromstr);
2758 # endif
2759 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2760 needs_swap);
2761 }
2762 break;
2763 }
2764 #endif
2765 case 'n' | TYPE_IS_SHRIEKING:
2766 case 'n':
2767 while (len-- > 0) {
2768 I16 ai16;
2769 fromstr = NEXTFROM;
2770 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2771 ai16 = PerlSock_htons(ai16);
2772 PUSH16(utf8, cur, &ai16, FALSE);
2773 }
2774 break;
2775 case 'v' | TYPE_IS_SHRIEKING:
2776 case 'v':
2777 while (len-- > 0) {
2778 I16 ai16;
2779 fromstr = NEXTFROM;
2780 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2781 ai16 = htovs(ai16);
2782 PUSH16(utf8, cur, &ai16, FALSE);
2783 }
2784 break;
2785 case 'S' | TYPE_IS_SHRIEKING:
2786 #if SHORTSIZE != SIZE16
2787 while (len-- > 0) {
2788 unsigned short aushort;
2789 fromstr = NEXTFROM;
2790 aushort = SvUV_no_inf(fromstr, datumtype);
2791 PUSH_VAR(utf8, cur, aushort, needs_swap);
2792 }
2793 break;
2794 #else
2795 /* FALLTHROUGH */
2796 #endif
2797 case 'S':
2798 while (len-- > 0) {
2799 U16 au16;
2800 fromstr = NEXTFROM;
2801 au16 = (U16)SvUV_no_inf(fromstr, datumtype);
2802 PUSH16(utf8, cur, &au16, needs_swap);
2803 }
2804 break;
2805 case 's' | TYPE_IS_SHRIEKING:
2806 #if SHORTSIZE != SIZE16
2807 while (len-- > 0) {
2808 short ashort;
2809 fromstr = NEXTFROM;
2810 ashort = SvIV_no_inf(fromstr, datumtype);
2811 PUSH_VAR(utf8, cur, ashort, needs_swap);
2812 }
2813 break;
2814 #else
2815 /* FALLTHROUGH */
2816 #endif
2817 case 's':
2818 while (len-- > 0) {
2819 I16 ai16;
2820 fromstr = NEXTFROM;
2821 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2822 PUSH16(utf8, cur, &ai16, needs_swap);
2823 }
2824 break;
2825 case 'I':
2826 case 'I' | TYPE_IS_SHRIEKING:
2827 while (len-- > 0) {
2828 unsigned int auint;
2829 fromstr = NEXTFROM;
2830 auint = SvUV_no_inf(fromstr, datumtype);
2831 PUSH_VAR(utf8, cur, auint, needs_swap);
2832 }
2833 break;
2834 case 'j':
2835 while (len-- > 0) {
2836 IV aiv;
2837 fromstr = NEXTFROM;
2838 aiv = SvIV_no_inf(fromstr, datumtype);
2839 PUSH_VAR(utf8, cur, aiv, needs_swap);
2840 }
2841 break;
2842 case 'J':
2843 while (len-- > 0) {
2844 UV auv;
2845 fromstr = NEXTFROM;
2846 auv = SvUV_no_inf(fromstr, datumtype);
2847 PUSH_VAR(utf8, cur, auv, needs_swap);
2848 }
2849 break;
2850 case 'w':
2851 while (len-- > 0) {
2852 NV anv;
2853 fromstr = NEXTFROM;
2854 S_sv_check_infnan(aTHX_ fromstr, datumtype);
2855 anv = SvNV_nomg(fromstr);
2856
2857 if (anv < 0) {
2858 *cur = '\0';
2859 SvCUR_set(cat, cur - start);
2860 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
2861 }
2862
2863 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2864 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2865 any negative IVs will have already been got by the croak()
2866 above. IOK is untrue for fractions, so we test them
2867 against UV_MAX_P1. */
2868 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2869 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
2870 char *in = buf + sizeof(buf);
2871 UV auv = SvUV_nomg(fromstr);
2872
2873 do {
2874 *--in = (char)((auv & 0x7f) | 0x80);
2875 auv >>= 7;
2876 } while (auv);
2877 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2878 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2879 in, (buf + sizeof(buf)) - in);
2880 } else if (SvPOKp(fromstr))
2881 goto w_string;
2882 else if (SvNOKp(fromstr)) {
2883 /* 10**NV_MAX_10_EXP is the largest power of 10
2884 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
2885 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2886 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2887 And with that many bytes only Inf can overflow.
2888 Some C compilers are strict about integral constant
2889 expressions so we conservatively divide by a slightly
2890 smaller integer instead of multiplying by the exact
2891 floating-point value.
2892 */
2893 #ifdef NV_MAX_10_EXP
2894 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
2895 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
2896 #else
2897 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
2898 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
2899 #endif
2900 char *in = buf + sizeof(buf);
2901
2902 anv = Perl_floor(anv);
2903 do {
2904 const NV next = Perl_floor(anv / 128);
2905 if (in <= buf) /* this cannot happen ;-) */
2906 Perl_croak(aTHX_ "Cannot compress integer in pack");
2907 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
2908 anv = next;
2909 } while (anv > 0);
2910 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2911 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2912 in, (buf + sizeof(buf)) - in);
2913 } else {
2914 const char *from;
2915 char *result, *in;
2916 SV *norm;
2917 STRLEN len;
2918 bool done;
2919
2920 w_string:
2921 /* Copy string and check for compliance */
2922 from = SvPV_nomg_const(fromstr, len);
2923 if ((norm = is_an_int(from, len)) == NULL)
2924 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
2925
2926 Newx(result, len, char);
2927 in = result + len;
2928 done = FALSE;
2929 while (!done) *--in = div128(norm, &done) | 0x80;
2930 result[len - 1] &= 0x7F; /* clear continue bit */
2931 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2932 in, (result + len) - in);
2933 Safefree(result);
2934 SvREFCNT_dec(norm); /* free norm */
2935 }
2936 }
2937 break;
2938 case 'i':
2939 case 'i' | TYPE_IS_SHRIEKING:
2940 while (len-- > 0) {
2941 int aint;
2942 fromstr = NEXTFROM;
2943 aint = SvIV_no_inf(fromstr, datumtype);
2944 PUSH_VAR(utf8, cur, aint, needs_swap);
2945 }
2946 break;
2947 case 'N' | TYPE_IS_SHRIEKING:
2948 case 'N':
2949 while (len-- > 0) {
2950 U32 au32;
2951 fromstr = NEXTFROM;
2952 au32 = SvUV_no_inf(fromstr, datumtype);
2953 au32 = PerlSock_htonl(au32);
2954 PUSH32(utf8, cur, &au32, FALSE);
2955 }
2956 break;
2957 case 'V' | TYPE_IS_SHRIEKING:
2958 case 'V':
2959 while (len-- > 0) {
2960 U32 au32;
2961 fromstr = NEXTFROM;
2962 au32 = SvUV_no_inf(fromstr, datumtype);
2963 au32 = htovl(au32);
2964 PUSH32(utf8, cur, &au32, FALSE);
2965 }
2966 break;
2967 case 'L' | TYPE_IS_SHRIEKING:
2968 #if LONGSIZE != SIZE32
2969 while (len-- > 0) {
2970 unsigned long aulong;
2971 fromstr = NEXTFROM;
2972 aulong = SvUV_no_inf(fromstr, datumtype);
2973 PUSH_VAR(utf8, cur, aulong, needs_swap);
2974 }
2975 break;
2976 #else
2977 /* Fall though! */
2978 #endif
2979 case 'L':
2980 while (len-- > 0) {
2981 U32 au32;
2982 fromstr = NEXTFROM;
2983 au32 = SvUV_no_inf(fromstr, datumtype);
2984 PUSH32(utf8, cur, &au32, needs_swap);
2985 }
2986 break;
2987 case 'l' | TYPE_IS_SHRIEKING:
2988 #if LONGSIZE != SIZE32
2989 while (len-- > 0) {
2990 long along;
2991 fromstr = NEXTFROM;
2992 along = SvIV_no_inf(fromstr, datumtype);
2993 PUSH_VAR(utf8, cur, along, needs_swap);
2994 }
2995 break;
2996 #else
2997 /* Fall though! */
2998 #endif
2999 case 'l':
3000 while (len-- > 0) {
3001 I32 ai32;
3002 fromstr = NEXTFROM;
3003 ai32 = SvIV_no_inf(fromstr, datumtype);
3004 PUSH32(utf8, cur, &ai32, needs_swap);
3005 }
3006 break;
3007 #if defined(HAS_QUAD) && IVSIZE >= 8
3008 case 'Q':
3009 while (len-- > 0) {
3010 Uquad_t auquad;
3011 fromstr = NEXTFROM;
3012 auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
3013 PUSH_VAR(utf8, cur, auquad, needs_swap);
3014 }
3015 break;
3016 case 'q':
3017 while (len-- > 0) {
3018 Quad_t aquad;
3019 fromstr = NEXTFROM;
3020 aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
3021 PUSH_VAR(utf8, cur, aquad, needs_swap);
3022 }
3023 break;
3024 #endif
3025 case 'P':
3026 len = 1; /* assume SV is correct length */
3027 GROWING(utf8, cat, start, cur, sizeof(char *));
3028 /* FALLTHROUGH */
3029 case 'p':
3030 while (len-- > 0) {
3031 const char *aptr;
3032
3033 fromstr = NEXTFROM;
3034 SvGETMAGIC(fromstr);
3035 if (!SvOK(fromstr)) aptr = NULL;
3036 else {
3037 /* XXX better yet, could spirit away the string to
3038 * a safe spot and hang on to it until the result
3039 * of pack() (and all copies of the result) are
3040 * gone.
3041 */
3042 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3043 !SvREADONLY(fromstr)))) {
3044 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3045 "Attempt to pack pointer to temporary value");
3046 }
3047 if (SvPOK(fromstr) || SvNIOK(fromstr))
3048 aptr = SvPV_nomg_const_nolen(fromstr);
3049 else
3050 aptr = SvPV_force_flags_nolen(fromstr, 0);
3051 }
3052 PUSH_VAR(utf8, cur, aptr, needs_swap);
3053 }
3054 break;
3055 case 'u': {
3056 const char *aptr, *aend;
3057 bool from_utf8;
3058
3059 fromstr = NEXTFROM;
3060 if (len <= 2) len = 45;
3061 else len = len / 3 * 3;
3062 if (len >= 64) {
3063 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3064 "Field too wide in 'u' format in pack");
3065 len = 63;
3066 }
3067 aptr = SvPV_const(fromstr, fromlen);
3068 from_utf8 = DO_UTF8(fromstr);
3069 if (from_utf8) {
3070 aend = aptr + fromlen;
3071 fromlen = sv_len_utf8_nomg(fromstr);
3072 } else aend = NULL; /* Unused, but keep compilers happy */
3073 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3074 while (fromlen > 0) {
3075 U8 *end;
3076 I32 todo;
3077 U8 hunk[1+63/3*4+1];
3078
3079 if ((I32)fromlen > len)
3080 todo = len;
3081 else
3082 todo = fromlen;
3083 if (from_utf8) {
3084 char buffer[64];
3085 if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3086 'u' | TYPE_IS_PACK)) {
3087 *cur = '\0';
3088 SvCUR_set(cat, cur - start);
3089 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3090 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3091 aptr, aend, buffer, (long) todo);
3092 }
3093 end = doencodes(hunk, (const U8 *)buffer, todo);
3094 } else {
3095 end = doencodes(hunk, (const U8 *)aptr, todo);
3096 aptr += todo;
3097 }
3098 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
3099 fromlen -= todo;
3100 }
3101 break;
3102 }
3103 }
3104 *cur = '\0';
3105 SvCUR_set(cat, cur - start);
3106 no_change:
3107 *symptr = lookahead;
3108 }
3109 return beglist;
3110 }
3111 #undef NEXTFROM
3112
3113
3114 PP(pp_pack)
3115 {
3116 dSP; dMARK; dORIGMARK; dTARGET;
3117 SV *cat = TARG;
3118 STRLEN fromlen;
3119 SV *pat_sv = *++MARK;
3120 const char *pat = SvPV_const(pat_sv, fromlen);
3121 const char *patend = pat + fromlen;
3122
3123 MARK++;
3124 sv_setpvs(cat, "");
3125 SvUTF8_off(cat);
3126
3127 packlist(cat, pat, patend, MARK, SP + 1);
3128
3129 SvSETMAGIC(cat);
3130 SP = ORIGMARK;
3131 PUSHs(cat);
3132 RETURN;
3133 }
3134
3135 /*
3136 * ex: set ts=8 sts=4 sw=4 et:
3137 */