printf-parse.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627
  1. /* Formatted output to strings.
  2. Copyright (C) 1999-2000, 2002-2003, 2006-2008 Free Software Foundation, Inc.
  3. This program is free software; you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation; either version 3, or (at your option)
  6. any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License along
  12. with this program; if not, write to the Free Software Foundation,
  13. Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
  14. /* This file can be parametrized with the following macros:
  15. CHAR_T The element type of the format string.
  16. CHAR_T_ONLY_ASCII Set to 1 to enable verification that all characters
  17. in the format string are ASCII.
  18. DIRECTIVE Structure denoting a format directive.
  19. Depends on CHAR_T.
  20. DIRECTIVES Structure denoting the set of format directives of a
  21. format string. Depends on CHAR_T.
  22. PRINTF_PARSE Function that parses a format string.
  23. Depends on CHAR_T.
  24. STATIC Set to 'static' to declare the function static.
  25. ENABLE_UNISTDIO Set to 1 to enable the unistdio extensions. */
  26. #ifndef PRINTF_PARSE
  27. # include <config.h>
  28. #endif
  29. /* Specification. */
  30. #ifndef PRINTF_PARSE
  31. # include "printf-parse.h"
  32. #endif
  33. /* Default parameters. */
  34. #ifndef PRINTF_PARSE
  35. # define PRINTF_PARSE printf_parse
  36. # define CHAR_T char
  37. # define DIRECTIVE char_directive
  38. # define DIRECTIVES char_directives
  39. #endif
  40. /* Get size_t, NULL. */
  41. #include <stddef.h>
  42. /* Get intmax_t. */
  43. #if defined IN_LIBINTL || defined IN_LIBASPRINTF
  44. # if HAVE_STDINT_H_WITH_UINTMAX
  45. # include <stdint.h>
  46. # endif
  47. # if HAVE_INTTYPES_H_WITH_UINTMAX
  48. # include <inttypes.h>
  49. # endif
  50. #else
  51. # include <stdint.h>
  52. #endif
  53. /* malloc(), realloc(), free(). */
  54. #include <stdlib.h>
  55. /* errno. */
  56. #include <errno.h>
  57. /* Checked size_t computations. */
  58. #include "xsize.h"
  59. #if CHAR_T_ONLY_ASCII
  60. /* c_isascii(). */
  61. # include "c-ctype.h"
  62. #endif
  63. #ifdef STATIC
  64. STATIC
  65. #endif
  66. int
  67. PRINTF_PARSE (const CHAR_T *format, DIRECTIVES *d, arguments *a)
  68. {
  69. const CHAR_T *cp = format; /* pointer into format */
  70. size_t arg_posn = 0; /* number of regular arguments consumed */
  71. size_t d_allocated; /* allocated elements of d->dir */
  72. size_t a_allocated; /* allocated elements of a->arg */
  73. size_t max_width_length = 0;
  74. size_t max_precision_length = 0;
  75. d->count = 0;
  76. d_allocated = 1;
  77. d->dir = (DIRECTIVE *) malloc (d_allocated * sizeof (DIRECTIVE));
  78. if (d->dir == NULL)
  79. /* Out of memory. */
  80. goto out_of_memory_1;
  81. a->count = 0;
  82. a_allocated = 0;
  83. a->arg = NULL;
  84. #define REGISTER_ARG(_index_,_type_) \
  85. { \
  86. size_t n = (_index_); \
  87. if (n >= a_allocated) \
  88. { \
  89. size_t memory_size; \
  90. argument *memory; \
  91. \
  92. a_allocated = xtimes (a_allocated, 2); \
  93. if (a_allocated <= n) \
  94. a_allocated = xsum (n, 1); \
  95. memory_size = xtimes (a_allocated, sizeof (argument)); \
  96. if (size_overflow_p (memory_size)) \
  97. /* Overflow, would lead to out of memory. */ \
  98. goto out_of_memory; \
  99. memory = (argument *) (a->arg \
  100. ? realloc (a->arg, memory_size) \
  101. : malloc (memory_size)); \
  102. if (memory == NULL) \
  103. /* Out of memory. */ \
  104. goto out_of_memory; \
  105. a->arg = memory; \
  106. } \
  107. while (a->count <= n) \
  108. a->arg[a->count++].type = TYPE_NONE; \
  109. if (a->arg[n].type == TYPE_NONE) \
  110. a->arg[n].type = (_type_); \
  111. else if (a->arg[n].type != (_type_)) \
  112. /* Ambiguous type for positional argument. */ \
  113. goto error; \
  114. }
  115. while (*cp != '\0')
  116. {
  117. CHAR_T c = *cp++;
  118. if (c == '%')
  119. {
  120. size_t arg_index = ARG_NONE;
  121. DIRECTIVE *dp = &d->dir[d->count]; /* pointer to next directive */
  122. /* Initialize the next directive. */
  123. dp->dir_start = cp - 1;
  124. dp->flags = 0;
  125. dp->width_start = NULL;
  126. dp->width_end = NULL;
  127. dp->width_arg_index = ARG_NONE;
  128. dp->precision_start = NULL;
  129. dp->precision_end = NULL;
  130. dp->precision_arg_index = ARG_NONE;
  131. dp->arg_index = ARG_NONE;
  132. /* Test for positional argument. */
  133. if (*cp >= '0' && *cp <= '9')
  134. {
  135. const CHAR_T *np;
  136. for (np = cp; *np >= '0' && *np <= '9'; np++)
  137. ;
  138. if (*np == '$')
  139. {
  140. size_t n = 0;
  141. for (np = cp; *np >= '0' && *np <= '9'; np++)
  142. n = xsum (xtimes (n, 10), *np - '0');
  143. if (n == 0)
  144. /* Positional argument 0. */
  145. goto error;
  146. if (size_overflow_p (n))
  147. /* n too large, would lead to out of memory later. */
  148. goto error;
  149. arg_index = n - 1;
  150. cp = np + 1;
  151. }
  152. }
  153. /* Read the flags. */
  154. for (;;)
  155. {
  156. if (*cp == '\'')
  157. {
  158. dp->flags |= FLAG_GROUP;
  159. cp++;
  160. }
  161. else if (*cp == '-')
  162. {
  163. dp->flags |= FLAG_LEFT;
  164. cp++;
  165. }
  166. else if (*cp == '+')
  167. {
  168. dp->flags |= FLAG_SHOWSIGN;
  169. cp++;
  170. }
  171. else if (*cp == ' ')
  172. {
  173. dp->flags |= FLAG_SPACE;
  174. cp++;
  175. }
  176. else if (*cp == '#')
  177. {
  178. dp->flags |= FLAG_ALT;
  179. cp++;
  180. }
  181. else if (*cp == '0')
  182. {
  183. dp->flags |= FLAG_ZERO;
  184. cp++;
  185. }
  186. else
  187. break;
  188. }
  189. /* Parse the field width. */
  190. if (*cp == '*')
  191. {
  192. dp->width_start = cp;
  193. cp++;
  194. dp->width_end = cp;
  195. if (max_width_length < 1)
  196. max_width_length = 1;
  197. /* Test for positional argument. */
  198. if (*cp >= '0' && *cp <= '9')
  199. {
  200. const CHAR_T *np;
  201. for (np = cp; *np >= '0' && *np <= '9'; np++)
  202. ;
  203. if (*np == '$')
  204. {
  205. size_t n = 0;
  206. for (np = cp; *np >= '0' && *np <= '9'; np++)
  207. n = xsum (xtimes (n, 10), *np - '0');
  208. if (n == 0)
  209. /* Positional argument 0. */
  210. goto error;
  211. if (size_overflow_p (n))
  212. /* n too large, would lead to out of memory later. */
  213. goto error;
  214. dp->width_arg_index = n - 1;
  215. cp = np + 1;
  216. }
  217. }
  218. if (dp->width_arg_index == ARG_NONE)
  219. {
  220. dp->width_arg_index = arg_posn++;
  221. if (dp->width_arg_index == ARG_NONE)
  222. /* arg_posn wrapped around. */
  223. goto error;
  224. }
  225. REGISTER_ARG (dp->width_arg_index, TYPE_INT);
  226. }
  227. else if (*cp >= '0' && *cp <= '9')
  228. {
  229. size_t width_length;
  230. dp->width_start = cp;
  231. for (; *cp >= '0' && *cp <= '9'; cp++)
  232. ;
  233. dp->width_end = cp;
  234. width_length = dp->width_end - dp->width_start;
  235. if (max_width_length < width_length)
  236. max_width_length = width_length;
  237. }
  238. /* Parse the precision. */
  239. if (*cp == '.')
  240. {
  241. cp++;
  242. if (*cp == '*')
  243. {
  244. dp->precision_start = cp - 1;
  245. cp++;
  246. dp->precision_end = cp;
  247. if (max_precision_length < 2)
  248. max_precision_length = 2;
  249. /* Test for positional argument. */
  250. if (*cp >= '0' && *cp <= '9')
  251. {
  252. const CHAR_T *np;
  253. for (np = cp; *np >= '0' && *np <= '9'; np++)
  254. ;
  255. if (*np == '$')
  256. {
  257. size_t n = 0;
  258. for (np = cp; *np >= '0' && *np <= '9'; np++)
  259. n = xsum (xtimes (n, 10), *np - '0');
  260. if (n == 0)
  261. /* Positional argument 0. */
  262. goto error;
  263. if (size_overflow_p (n))
  264. /* n too large, would lead to out of memory
  265. later. */
  266. goto error;
  267. dp->precision_arg_index = n - 1;
  268. cp = np + 1;
  269. }
  270. }
  271. if (dp->precision_arg_index == ARG_NONE)
  272. {
  273. dp->precision_arg_index = arg_posn++;
  274. if (dp->precision_arg_index == ARG_NONE)
  275. /* arg_posn wrapped around. */
  276. goto error;
  277. }
  278. REGISTER_ARG (dp->precision_arg_index, TYPE_INT);
  279. }
  280. else
  281. {
  282. size_t precision_length;
  283. dp->precision_start = cp - 1;
  284. for (; *cp >= '0' && *cp <= '9'; cp++)
  285. ;
  286. dp->precision_end = cp;
  287. precision_length = dp->precision_end - dp->precision_start;
  288. if (max_precision_length < precision_length)
  289. max_precision_length = precision_length;
  290. }
  291. }
  292. {
  293. arg_type type;
  294. /* Parse argument type/size specifiers. */
  295. {
  296. int flags = 0;
  297. for (;;)
  298. {
  299. if (*cp == 'h')
  300. {
  301. flags |= (1 << (flags & 1));
  302. cp++;
  303. }
  304. else if (*cp == 'L')
  305. {
  306. flags |= 4;
  307. cp++;
  308. }
  309. else if (*cp == 'l')
  310. {
  311. flags += 8;
  312. cp++;
  313. }
  314. else if (*cp == 'j')
  315. {
  316. if (sizeof (intmax_t) > sizeof (long))
  317. {
  318. /* intmax_t = long long */
  319. flags += 16;
  320. }
  321. else if (sizeof (intmax_t) > sizeof (int))
  322. {
  323. /* intmax_t = long */
  324. flags += 8;
  325. }
  326. cp++;
  327. }
  328. else if (*cp == 'z' || *cp == 'Z')
  329. {
  330. /* 'z' is standardized in ISO C 99, but glibc uses 'Z'
  331. because the warning facility in gcc-2.95.2 understands
  332. only 'Z' (see gcc-2.95.2/gcc/c-common.c:1784). */
  333. if (sizeof (size_t) > sizeof (long))
  334. {
  335. /* size_t = long long */
  336. flags += 16;
  337. }
  338. else if (sizeof (size_t) > sizeof (int))
  339. {
  340. /* size_t = long */
  341. flags += 8;
  342. }
  343. cp++;
  344. }
  345. else if (*cp == 't')
  346. {
  347. if (sizeof (ptrdiff_t) > sizeof (long))
  348. {
  349. /* ptrdiff_t = long long */
  350. flags += 16;
  351. }
  352. else if (sizeof (ptrdiff_t) > sizeof (int))
  353. {
  354. /* ptrdiff_t = long */
  355. flags += 8;
  356. }
  357. cp++;
  358. }
  359. #if defined __APPLE__ && defined __MACH__
  360. /* On MacOS X 10.3, PRIdMAX is defined as "qd".
  361. We cannot change it to "lld" because PRIdMAX must also
  362. be understood by the system's printf routines. */
  363. else if (*cp == 'q')
  364. {
  365. if (64 / 8 > sizeof (long))
  366. {
  367. /* int64_t = long long */
  368. flags += 16;
  369. }
  370. else
  371. {
  372. /* int64_t = long */
  373. flags += 8;
  374. }
  375. cp++;
  376. }
  377. #endif
  378. #if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__
  379. /* On native Win32, PRIdMAX is defined as "I64d".
  380. We cannot change it to "lld" because PRIdMAX must also
  381. be understood by the system's printf routines. */
  382. else if (*cp == 'I' && cp[1] == '6' && cp[2] == '4')
  383. {
  384. if (64 / 8 > sizeof (long))
  385. {
  386. /* __int64 = long long */
  387. flags += 16;
  388. }
  389. else
  390. {
  391. /* __int64 = long */
  392. flags += 8;
  393. }
  394. cp += 3;
  395. }
  396. #endif
  397. else
  398. break;
  399. }
  400. /* Read the conversion character. */
  401. c = *cp++;
  402. switch (c)
  403. {
  404. case 'd': case 'i':
  405. #if HAVE_LONG_LONG_INT
  406. /* If 'long long' exists and is larger than 'long': */
  407. if (flags >= 16 || (flags & 4))
  408. type = TYPE_LONGLONGINT;
  409. else
  410. #endif
  411. /* If 'long long' exists and is the same as 'long', we parse
  412. "lld" into TYPE_LONGINT. */
  413. if (flags >= 8)
  414. type = TYPE_LONGINT;
  415. else if (flags & 2)
  416. type = TYPE_SCHAR;
  417. else if (flags & 1)
  418. type = TYPE_SHORT;
  419. else
  420. type = TYPE_INT;
  421. break;
  422. case 'o': case 'u': case 'x': case 'X':
  423. #if HAVE_LONG_LONG_INT
  424. /* If 'long long' exists and is larger than 'long': */
  425. if (flags >= 16 || (flags & 4))
  426. type = TYPE_ULONGLONGINT;
  427. else
  428. #endif
  429. /* If 'unsigned long long' exists and is the same as
  430. 'unsigned long', we parse "llu" into TYPE_ULONGINT. */
  431. if (flags >= 8)
  432. type = TYPE_ULONGINT;
  433. else if (flags & 2)
  434. type = TYPE_UCHAR;
  435. else if (flags & 1)
  436. type = TYPE_USHORT;
  437. else
  438. type = TYPE_UINT;
  439. break;
  440. case 'f': case 'F': case 'e': case 'E': case 'g': case 'G':
  441. case 'a': case 'A':
  442. if (flags >= 16 || (flags & 4))
  443. type = TYPE_LONGDOUBLE;
  444. else
  445. type = TYPE_DOUBLE;
  446. break;
  447. case 'c':
  448. if (flags >= 8)
  449. #if HAVE_WINT_T
  450. type = TYPE_WIDE_CHAR;
  451. #else
  452. goto error;
  453. #endif
  454. else
  455. type = TYPE_CHAR;
  456. break;
  457. #if HAVE_WINT_T
  458. case 'C':
  459. type = TYPE_WIDE_CHAR;
  460. c = 'c';
  461. break;
  462. #endif
  463. case 's':
  464. if (flags >= 8)
  465. #if HAVE_WCHAR_T
  466. type = TYPE_WIDE_STRING;
  467. #else
  468. goto error;
  469. #endif
  470. else
  471. type = TYPE_STRING;
  472. break;
  473. #if HAVE_WCHAR_T
  474. case 'S':
  475. type = TYPE_WIDE_STRING;
  476. c = 's';
  477. break;
  478. #endif
  479. case 'p':
  480. type = TYPE_POINTER;
  481. break;
  482. case 'n':
  483. #if HAVE_LONG_LONG_INT
  484. /* If 'long long' exists and is larger than 'long': */
  485. if (flags >= 16 || (flags & 4))
  486. type = TYPE_COUNT_LONGLONGINT_POINTER;
  487. else
  488. #endif
  489. /* If 'long long' exists and is the same as 'long', we parse
  490. "lln" into TYPE_COUNT_LONGINT_POINTER. */
  491. if (flags >= 8)
  492. type = TYPE_COUNT_LONGINT_POINTER;
  493. else if (flags & 2)
  494. type = TYPE_COUNT_SCHAR_POINTER;
  495. else if (flags & 1)
  496. type = TYPE_COUNT_SHORT_POINTER;
  497. else
  498. type = TYPE_COUNT_INT_POINTER;
  499. break;
  500. #if ENABLE_UNISTDIO
  501. /* The unistdio extensions. */
  502. case 'U':
  503. if (flags >= 16)
  504. type = TYPE_U32_STRING;
  505. else if (flags >= 8)
  506. type = TYPE_U16_STRING;
  507. else
  508. type = TYPE_U8_STRING;
  509. break;
  510. #endif
  511. case '%':
  512. type = TYPE_NONE;
  513. break;
  514. default:
  515. /* Unknown conversion character. */
  516. goto error;
  517. }
  518. }
  519. if (type != TYPE_NONE)
  520. {
  521. dp->arg_index = arg_index;
  522. if (dp->arg_index == ARG_NONE)
  523. {
  524. dp->arg_index = arg_posn++;
  525. if (dp->arg_index == ARG_NONE)
  526. /* arg_posn wrapped around. */
  527. goto error;
  528. }
  529. REGISTER_ARG (dp->arg_index, type);
  530. }
  531. dp->conversion = c;
  532. dp->dir_end = cp;
  533. }
  534. d->count++;
  535. if (d->count >= d_allocated)
  536. {
  537. size_t memory_size;
  538. DIRECTIVE *memory;
  539. d_allocated = xtimes (d_allocated, 2);
  540. memory_size = xtimes (d_allocated, sizeof (DIRECTIVE));
  541. if (size_overflow_p (memory_size))
  542. /* Overflow, would lead to out of memory. */
  543. goto out_of_memory;
  544. memory = (DIRECTIVE *) realloc (d->dir, memory_size);
  545. if (memory == NULL)
  546. /* Out of memory. */
  547. goto out_of_memory;
  548. d->dir = memory;
  549. }
  550. }
  551. #if CHAR_T_ONLY_ASCII
  552. else if (!c_isascii (c))
  553. {
  554. /* Non-ASCII character. Not supported. */
  555. goto error;
  556. }
  557. #endif
  558. }
  559. d->dir[d->count].dir_start = cp;
  560. d->max_width_length = max_width_length;
  561. d->max_precision_length = max_precision_length;
  562. return 0;
  563. error:
  564. if (a->arg)
  565. free (a->arg);
  566. if (d->dir)
  567. free (d->dir);
  568. errno = EINVAL;
  569. return -1;
  570. out_of_memory:
  571. if (a->arg)
  572. free (a->arg);
  573. if (d->dir)
  574. free (d->dir);
  575. out_of_memory_1:
  576. errno = ENOMEM;
  577. return -1;
  578. }
  579. #undef PRINTF_PARSE
  580. #undef DIRECTIVES
  581. #undef DIRECTIVE
  582. #undef CHAR_T_ONLY_ASCII
  583. #undef CHAR_T