| pcre2api.3 | pcre2api.3 | |||
|---|---|---|---|---|
| skipping to change at line 101 | skipping to change at line 101 | |||
| int pcre2_set_newline(pcre2_compile_context *ccontext, | int pcre2_set_newline(pcre2_compile_context *ccontext, | |||
| uint32_t value); | uint32_t value); | |||
| int pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, | int pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, | |||
| uint32_t value); | uint32_t value); | |||
| int pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontex t, | int pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontex t, | |||
| int (*guard_function)(uint32_t, void *), void *user_data); | int (*guard_function)(uint32_t, void *), void *user_data); | |||
| int pcre2_set_optimize(pcre2_compile_context *ccontext, | ||||
| uint32_t directive); | ||||
| PCRE2 NATIVE API MATCH CONTEXT FUNCTIONS | PCRE2 NATIVE API MATCH CONTEXT FUNCTIONS | |||
| pcre2_match_context *pcre2_match_context_create( | pcre2_match_context *pcre2_match_context_create( | |||
| pcre2_general_context *gcontext); | pcre2_general_context *gcontext); | |||
| pcre2_match_context *pcre2_match_context_copy( | pcre2_match_context *pcre2_match_context_copy( | |||
| pcre2_match_context *mcontext); | pcre2_match_context *mcontext); | |||
| void pcre2_match_context_free(pcre2_match_context *mcontext); | void pcre2_match_context_free(pcre2_match_context *mcontext); | |||
| int pcre2_set_callout(pcre2_match_context *mcontext, | int pcre2_set_callout(pcre2_match_context *mcontext, | |||
| int (*callout_function)(pcre2_callout_block *, void *), | int (*callout_function)(pcre2_callout_block *, void *), | |||
| void *callout_data); | void *callout_data); | |||
| int pcre2_set_substitute_callout(pcre2_match_context *mcontext, | int pcre2_set_substitute_callout(pcre2_match_context *mcontext, | |||
| int (*callout_function)(pcre2_substitute_callout_block *, void *), | int (*callout_function)(pcre2_substitute_callout_block *, void *), | |||
| void *callout_data); | void *callout_data); | |||
| int pcre2_set_substitute_case_callout(pcre2_match_context *mcontext, | ||||
| PCRE2_SIZE (*callout_function)(PCRE2_SPTR, PCRE2_SIZE, | ||||
| PCRE2_UCHAR *, PCRE2_SIZE, | ||||
| int, void *), | ||||
| void *callout_data); | ||||
| int pcre2_set_offset_limit(pcre2_match_context *mcontext, | int pcre2_set_offset_limit(pcre2_match_context *mcontext, | |||
| PCRE2_SIZE value); | PCRE2_SIZE value); | |||
| int pcre2_set_heap_limit(pcre2_match_context *mcontext, | int pcre2_set_heap_limit(pcre2_match_context *mcontext, | |||
| uint32_t value); | uint32_t value); | |||
| int pcre2_set_match_limit(pcre2_match_context *mcontext, | int pcre2_set_match_limit(pcre2_match_context *mcontext, | |||
| uint32_t value); | uint32_t value); | |||
| int pcre2_set_depth_limit(pcre2_match_context *mcontext, | int pcre2_set_depth_limit(pcre2_match_context *mcontext, | |||
| skipping to change at line 586 | skipping to change at line 595 | |||
| A compile context is required if you want to provide an external fu nction for stack checking | A compile context is required if you want to provide an external fu nction for stack checking | |||
| during compilation or to change the default values of any of the fo llowing compile-time parame‐ | during compilation or to change the default values of any of the fo llowing compile-time parame‐ | |||
| ters: | ters: | |||
| What \R matches (Unicode newlines or CR, LF, CRLF only) | What \R matches (Unicode newlines or CR, LF, CRLF only) | |||
| PCRE2's character tables | PCRE2's character tables | |||
| The newline character sequence | The newline character sequence | |||
| The compile time nested parentheses limit | The compile time nested parentheses limit | |||
| The maximum length of the pattern string | The maximum length of the pattern string | |||
| The extra options bits (none set by default) | The extra options bits (none set by default) | |||
| Which performance optimizations the compiler should apply | ||||
| A compile context is also required if you are using custom memory ma nagement. If none of these | A compile context is also required if you are using custom memory ma nagement. If none of these | |||
| apply, just pass NULL as the context argument of pcre2_compile(). | apply, just pass NULL as the context argument of pcre2_compile(). | |||
| A compile context is created, copied, and freed by the following fun ctions: | A compile context is created, copied, and freed by the following fun ctions: | |||
| pcre2_compile_context *pcre2_compile_context_create( | pcre2_compile_context *pcre2_compile_context_create( | |||
| pcre2_general_context *gcontext); | pcre2_general_context *gcontext); | |||
| pcre2_compile_context *pcre2_compile_context_copy( | pcre2_compile_context *pcre2_compile_context_copy( | |||
| skipping to change at line 689 | skipping to change at line 699 | |||
| where running out of stack is to be avoided at all costs. The pa renthesis limit above cannot | where running out of stack is to be avoided at all costs. The pa renthesis limit above cannot | |||
| take account of how much stack is actually available during compilat ion. For a finer control, | take account of how much stack is actually available during compilat ion. For a finer control, | |||
| you can supply a function that is called whenever pcre2_compile() s tarts to compile a parenthe‐ | you can supply a function that is called whenever pcre2_compile() s tarts to compile a parenthe‐ | |||
| sized part of a pattern. This function can check the actual stack si ze (or anything else that it | sized part of a pattern. This function can check the actual stack si ze (or anything else that it | |||
| wants to, of course). | wants to, of course). | |||
| The first argument to the callout function gives the current depth o f nesting, and the second is | The first argument to the callout function gives the current depth o f nesting, and the second is | |||
| user data that is set up by the last argument of pcre2_set_compile_r ecursion_guard(). The call‐ | user data that is set up by the last argument of pcre2_set_compile_r ecursion_guard(). The call‐ | |||
| out function should return zero if all is well, or non-zero to force an error. | out function should return zero if all is well, or non-zero to force an error. | |||
| int pcre2_set_optimize(pcre2_compile_context *ccontext, | ||||
| uint32_t directive); | ||||
| PCRE2 can apply various performance optimizations during compilatio | ||||
| n, in order to make matching | ||||
| faster. For example, the compiler might convert some regex construct | ||||
| s into an equivalent con‐ | ||||
| struct which pcre2_match() can execute faster. By default, all ava | ||||
| ilable optimizations are en‐ | ||||
| abled. However, in rare cases, one might wish to disable specific op | ||||
| timizations. For example, if | ||||
| it is known that some optimizations cannot benefit a certain regex, | ||||
| it might be desirable to | ||||
| disable them, in order to speed up compilation. | ||||
| The permitted values of directive are as follows: | ||||
| PCRE2_OPTIMIZATION_FULL | ||||
| Enable all optional performance optimizations. This is the default v | ||||
| alue. | ||||
| PCRE2_OPTIMIZATION_NONE | ||||
| Disable all optional performance optimizations. | ||||
| PCRE2_AUTO_POSSESS | ||||
| PCRE2_AUTO_POSSESS_OFF | ||||
| Enable/disable "auto-possessification" of variable quantifiers such | ||||
| as * and +. This optimiza‐ | ||||
| tion, for example, turns a+b into a++b in order to avoid backtracks | ||||
| into a+ that can never be | ||||
| successful. However, if callouts are in use, auto-possessification | ||||
| means that some callouts are | ||||
| never taken. You can disable this optimization if you want the match | ||||
| ing functions to do a full, | ||||
| unoptimized search and run all the callouts. | ||||
| PCRE2_DOTSTAR_ANCHOR | ||||
| PCRE2_DOTSTAR_ANCHOR_OFF | ||||
| Enable/disable an optimization that is applied when .* is the firs | ||||
| t significant item in a top- | ||||
| level branch of a pattern, and all the other branches also start wit | ||||
| h .* or with \A or \G or ^. | ||||
| Such a pattern is automatically anchored if PCRE2_DOTALL is | ||||
| set for all the .* items and | ||||
| PCRE2_MULTILINE is not set for any ^ items. Otherwise, the fact that | ||||
| any match must start either | ||||
| at the start of the subject or following a newline is remembered. Li | ||||
| ke other optimizations, this | ||||
| can cause callouts to be skipped. | ||||
| Dotstar anchor optimization is automatically disabled for .* if it i | ||||
| s inside an atomic group or | ||||
| a capture group that is the subject of a backreference, or if the | ||||
| pattern contains (*PRUNE) or | ||||
| (*SKIP). | ||||
| PCRE2_START_OPTIMIZE | ||||
| PCRE2_START_OPTIMIZE_OFF | ||||
| Enable/disable optimizations which cause matching functions to scan | ||||
| the subject string for spe‐ | ||||
| cific code unit values before attempting a match. For example, if it | ||||
| is known that an unanchored | ||||
| match must start with a specific value, the matching code searches | ||||
| the subject for that value, | ||||
| and fails immediately if it cannot find it, without actually running | ||||
| the main matching function. | ||||
| This means that a special item such as (*COMMIT) at the start of a p | ||||
| attern is not considered un‐ | ||||
| til after a suitable starting point for the match has been found. | ||||
| Also, when callouts or | ||||
| (*MARK) items are in use, these "start-up" optimizations can cau | ||||
| se them to be skipped if the | ||||
| pattern is never actually used. The start-up optimizations are in ef | ||||
| fect a pre-scan of the sub‐ | ||||
| ject that takes place before the pattern is run. | ||||
| Disabling start-up optimizations ensures that in cases where the res | ||||
| ult is "no match", the call‐ | ||||
| outs do occur, and that items such as (*COMMIT) and (*MARK) are | ||||
| considered at every possible | ||||
| starting position in the subject string. | ||||
| Disabling start-up optimizations may change the outcome of a matchin | ||||
| g operation. Consider the | ||||
| pattern | ||||
| (*COMMIT)ABC | ||||
| When this is compiled, PCRE2 records the fact that a match must s | ||||
| tart with the character "A". | ||||
| Suppose the subject string is "DEFABC". The start-up optimization sc | ||||
| ans along the subject, finds | ||||
| "A" and runs the first match attempt from there. The (*COMMIT) item | ||||
| means that the pattern must | ||||
| match the current starting position, which in this case, it does. H | ||||
| owever, if the same match is | ||||
| run without start-up optimizations, the initial scan along the subje | ||||
| ct string does not happen. | ||||
| The first match attempt is run starting from "D" and when this f | ||||
| ails, (*COMMIT) prevents any | ||||
| further matches being tried, so the overall result is "no match". | ||||
| Another start-up optimization makes use of a minimum length for a m | ||||
| atching subject, which is | ||||
| recorded when possible. Consider the pattern | ||||
| (*MARK:1)B(*MARK:2)(X|Y) | ||||
| The minimum length for a match is two characters. If the subject is | ||||
| "XXBB", the "starting char‐ | ||||
| acter" optimization skips "XX", then tries to match "BB", which is l | ||||
| ong enough. In the process, | ||||
| (*MARK:2) is encountered and remembered. When the match attempt f | ||||
| ails, the next "B" is found, | ||||
| but there is only one character left, so there are no more attempts, | ||||
| and "no match" is returned | ||||
| with the "last mark seen" set to "2". Without start-up optimizations | ||||
| , however, matches are tried | ||||
| at every possible starting position, including at the end of the sub | ||||
| ject, where (*MARK:1) is en‐ | ||||
| countered, but there is no "B", so the "last mark seen" that is re | ||||
| turned is "1". In this case, | ||||
| the optimizations do not affect the overall match result, which is s | ||||
| till "no match", but they do | ||||
| affect the auxiliary information that is returned. | ||||
| The match context | The match context | |||
| A match context is required if you want to: | A match context is required if you want to: | |||
| Set up a callout function | Set up a callout function | |||
| Set an offset limit for matching an unanchored pattern | Set an offset limit for matching an unanchored pattern | |||
| Change the limit on the amount of heap used when matching | Change the limit on the amount of heap used when matching | |||
| Change the backtracking match limit | Change the backtracking match limit | |||
| Change the backtracking depth limit | Change the backtracking depth limit | |||
| Set custom memory management specifically for the match | Set custom memory management specifically for the match | |||
| If none of these apply, just pass NULL as the context argument of pcre2_match(), | If none of these apply, just pass NULL as the context ar gument of pcre2_match(), | |||
| pcre2_dfa_match(), or pcre2_jit_match(). | pcre2_dfa_match(), or pcre2_jit_match(). | |||
| A match context is created, copied, and freed by the following funct ions: | A match context is created, copied, and freed by the following funct ions: | |||
| pcre2_match_context *pcre2_match_context_create( | pcre2_match_context *pcre2_match_context_create( | |||
| pcre2_general_context *gcontext); | pcre2_general_context *gcontext); | |||
| pcre2_match_context *pcre2_match_context_copy( | pcre2_match_context *pcre2_match_context_copy( | |||
| pcre2_match_context *mcontext); | pcre2_match_context *mcontext); | |||
| void pcre2_match_context_free(pcre2_match_context *mcontext); | void pcre2_match_context_free(pcre2_match_context *mcontext); | |||
| A match context is created with default values for its parameters. T hese can be changed by call‐ | A match context is created with default values for its parameters. T hese can be changed by call‐ | |||
| ing the following functions, which return 0 on success, or PCRE2_ERR OR_BADDATA if invalid data | ing the following functions, which return 0 on success, or PCRE2_E RROR_BADDATA if invalid data | |||
| is detected. | is detected. | |||
| int pcre2_set_callout(pcre2_match_context *mcontext, | int pcre2_set_callout(pcre2_match_context *mcontext, | |||
| int (*callout_function)(pcre2_callout_block *, void *), | int (*callout_function)(pcre2_callout_block *, void *), | |||
| void *callout_data); | void *callout_data); | |||
| This sets up a callout function for PCRE2 to call at specified poi nts during a matching opera‐ | This sets up a callout function for PCRE2 to call at specified point s during a matching opera‐ | |||
| tion. Details are given in the pcre2callout documentation. | tion. Details are given in the pcre2callout documentation. | |||
| int pcre2_set_substitute_callout(pcre2_match_context *mcontext, | int pcre2_set_substitute_callout(pcre2_match_context *mcontext, | |||
| int (*callout_function)(pcre2_substitute_callout_block *, void *), | int (*callout_function)(pcre2_substitute_callout_block *, void *), | |||
| void *callout_data); | void *callout_data); | |||
| This sets up a callout function for PCRE2 to call after each substit ution made by pcre2_substi‐ | This sets up a callout function for PCRE2 to call after each substi tution made by pcre2_substi‐ | |||
| tute(). Details are given in the section entitled "Creating a new st ring with substitutions" be‐ | tute(). Details are given in the section entitled "Creating a new st ring with substitutions" be‐ | |||
| low. | low. | |||
| int pcre2_set_substitute_case_callout(pcre2_match_context *mcontext, | ||||
| PCRE2_SIZE (*callout_function)(PCRE2_SPTR, PCRE2_SIZE, | ||||
| PCRE2_UCHAR *, PCRE2_SIZE, | ||||
| int, void *), | ||||
| void *callout_data); | ||||
| This sets up a callout function for PCRE2 to call when performing c | ||||
| ase transformations inside | ||||
| pcre2_substitute(). Details are given in the section entitled "Cre | ||||
| ating a new string with sub‐ | ||||
| stitutions" below. | ||||
| int pcre2_set_offset_limit(pcre2_match_context *mcontext, | int pcre2_set_offset_limit(pcre2_match_context *mcontext, | |||
| PCRE2_SIZE value); | PCRE2_SIZE value); | |||
| The offset_limit parameter limits how far an unanchored searc | The offset_limit parameter limits how far an unanchored search ca | |||
| h can advance in the subject | n advance in the subject | |||
| string. The default value is PCRE2_UNSET. The pcre2_match() and pcre | string. The default value is PCRE2_UNSET. The pcre2_match() and pcr | |||
| 2_dfa_match() functions re‐ | e2_dfa_match() functions re‐ | |||
| turn PCRE2_ERROR_NOMATCH if a match with a starting point before o | turn PCRE2_ERROR_NOMATCH if a match with a starting point before or | |||
| r at the given offset is not | at the given offset is not | |||
| found. The pcre2_substitute() function makes no more substitutions. | found. The pcre2_substitute() function makes no more substitutions. | |||
| For example, if the pattern /abc/ is matched against "123abc" with a | For example, if the pattern /abc/ is matched against "123abc" with | |||
| n offset limit less than 3, | an offset limit less than 3, | |||
| the result is PCRE2_ERROR_NOMATCH. A match can never be found if | the result is PCRE2_ERROR_NOMATCH. A match can never be found if th | |||
| the startoffset argument of | e startoffset argument of | |||
| pcre2_match(), pcre2_dfa_match(), or pcre2_substitute() is greater t | pcre2_match(), pcre2_dfa_match(), or pcre2_substitute() is greater | |||
| han the offset limit set in | than the offset limit set in | |||
| the match context. | the match context. | |||
| When using this facility, you must set the PCRE2_USE_OFFSET_LIMIT op tion when calling pcre2_com‐ | When using this facility, you must set the PCRE2_USE_OFFSET_LIMIT op tion when calling pcre2_com‐ | |||
| pile() so that when JIT is in use, different code can be compiled. If a match is started with a | pile() so that when JIT is in use, different code can be compiled. I f a match is started with a | |||
| non-default match limit when PCRE2_USE_OFFSET_LIMIT is not set, an e rror is generated. | non-default match limit when PCRE2_USE_OFFSET_LIMIT is not set, an e rror is generated. | |||
| The offset limit facility can be used to track progress when searchi ng large subject strings or | The offset limit facility can be used to track progress when search ing large subject strings or | |||
| to limit the extent of global substitutions. See also the PCRE2_FIRS TLINE option, which requires | to limit the extent of global substitutions. See also the PCRE2_FIRS TLINE option, which requires | |||
| a match to start before or at the first newline that follows the s | a match to start before or at the first newline that follows the sta | |||
| tart of matching in the sub‐ | rt of matching in the sub‐ | |||
| ject. If this is set with an offset limit, a match must occur in the | ject. If this is set with an offset limit, a match must occur in th | |||
| first line and also within | e first line and also within | |||
| the offset limit. In other words, whichever limit comes first is use d. | the offset limit. In other words, whichever limit comes first is use d. | |||
| int pcre2_set_heap_limit(pcre2_match_context *mcontext, | int pcre2_set_heap_limit(pcre2_match_context *mcontext, | |||
| uint32_t value); | uint32_t value); | |||
| The heap_limit parameter specifies, in units of kibibytes (1024 b | The heap_limit parameter specifies, in units of kibibytes (1024 byte | |||
| ytes), the maximum amount of | s), the maximum amount of | |||
| heap memory that pcre2_match() may use to hold backtracking informat | heap memory that pcre2_match() may use to hold backtracking inform | |||
| ion when running an inter‐ | ation when running an inter‐ | |||
| pretive match. This limit also applies to pcre2_dfa_match(), whic | pretive match. This limit also applies to pcre2_dfa_match(), which m | |||
| h may use the heap when pro‐ | ay use the heap when pro‐ | |||
| cessing patterns with a lot of nested pattern recursion or lookaroun | cessing patterns with a lot of nested pattern recursion or lookar | |||
| ds or atomic groups. This | ounds or atomic groups. This | |||
| limit does not apply to matching with the JIT optimization, whic | limit does not apply to matching with the JIT optimization, which ha | |||
| h has its own memory control | s its own memory control | |||
| arrangements (see the pcre2jit documentation for more details). If t | arrangements (see the pcre2jit documentation for more details). | |||
| he limit is reached, the | If the limit is reached, the | |||
| negative error code PCRE2_ERROR_HEAPLIMIT is returned. The default | negative error code PCRE2_ERROR_HEAPLIMIT is returned. The default l | |||
| limit can be set when PCRE2 | imit can be set when PCRE2 | |||
| is built; if it is not, the default is set very large and is essenti ally unlimited. | is built; if it is not, the default is set very large and is essenti ally unlimited. | |||
| A value for the heap limit may also be supplied by an item at the st art of a pattern of the form | A value for the heap limit may also be supplied by an item at the st art of a pattern of the form | |||
| (*LIMIT_HEAP=ddd) | (*LIMIT_HEAP=ddd) | |||
| where ddd is a decimal number. However, such a setting is ignored un less ddd is less than the | where ddd is a decimal number. However, such a setting is ignored unless ddd is less than the | |||
| limit set by the caller of pcre2_match() or, if no such limit is set , less than the default. | limit set by the caller of pcre2_match() or, if no such limit is set , less than the default. | |||
| The pcre2_match() function always needs some heap memory, so settin | The pcre2_match() function always needs some heap memory, so setting | |||
| g a value of zero guarantees | a value of zero guarantees | |||
| a "heap limit exceeded" error. Details of how pcre2_match() uses th | a "heap limit exceeded" error. Details of how pcre2_match() us | |||
| e heap are given in the | es the heap are given in the | |||
| pcre2perform documentation. | pcre2perform documentation. | |||
| For pcre2_dfa_match(), a vector on the system stack is used when pr | For pcre2_dfa_match(), a vector on the system stack is used when pro | |||
| ocessing pattern recursions, | cessing pattern recursions, | |||
| lookarounds, or atomic groups, and only if this is not big enough is | lookarounds, or atomic groups, and only if this is not big enough | |||
| heap memory used. In this | is heap memory used. In this | |||
| case, setting a value of zero disables the use of the heap. | case, setting a value of zero disables the use of the heap. | |||
| int pcre2_set_match_limit(pcre2_match_context *mcontext, | int pcre2_set_match_limit(pcre2_match_context *mcontext, | |||
| uint32_t value); | uint32_t value); | |||
| The match_limit parameter provides a means of preventing PCRE2 from using up too many computing | The match_limit parameter provides a means of preventing PCRE2 from using up too many computing | |||
| resources when processing patterns that are not going to match, but which have a very large num‐ | resources when processing patterns that are not going to match, but which have a very large num‐ | |||
| ber of possibilities in their search trees. The classic example is a pattern that uses nested | ber of possibilities in their search trees. The classic example i s a pattern that uses nested | |||
| unlimited repeats. | unlimited repeats. | |||
| There is an internal counter in pcre2_match() that is incremen | There is an internal counter in pcre2_match() that is incremented | |||
| ted each time round its main | each time round its main | |||
| matching loop. If this value reaches the match limit, pcre2_match() | matching loop. If this value reaches the match limit, pcre2_match( | |||
| returns the negative value | ) returns the negative value | |||
| PCRE2_ERROR_MATCHLIMIT. This has the effect of limiting the amount o f backtracking that can take | PCRE2_ERROR_MATCHLIMIT. This has the effect of limiting the amount o f backtracking that can take | |||
| place. For patterns that are not anchored, the count restarts from z ero for each position in the | place. For patterns that are not anchored, the count restarts from z ero for each position in the | |||
| subject string. This limit also applies to pcre2_dfa_match(), thou gh the counting is done in a | subject string. This limit also applies to pcre2_dfa_match(), though the counting is done in a | |||
| different way. | different way. | |||
| When pcre2_match() is called with a pattern that was successfully p rocessed by pcre2_jit_com‐ | When pcre2_match() is called with a pattern that was successfully processed by pcre2_jit_com‐ | |||
| pile(), the way in which matching is executed is entirely different. However, there is still the | pile(), the way in which matching is executed is entirely different. However, there is still the | |||
| possibility of runaway matching that goes on for a very long time, and so the match_limit value | possibility of runaway matching that goes on for a very long time, a nd so the match_limit value | |||
| is also used in this case (but in a different way) to limit how long the matching can continue. | is also used in this case (but in a different way) to limit how long the matching can continue. | |||
| The default value for the limit can be set when PCRE2 is built; the default is 10 million, which | The default value for the limit can be set when PCRE2 is built; the default is 10 million, which | |||
| handles all but the most extreme cases. A value for the match limit may also be supplied by an | handles all but the most extreme cases. A value for the match limi t may also be supplied by an | |||
| item at the start of a pattern of the form | item at the start of a pattern of the form | |||
| (*LIMIT_MATCH=ddd) | (*LIMIT_MATCH=ddd) | |||
| where ddd is a decimal number. However, such a setting is ignored | where ddd is a decimal number. However, such a setting is ignored un | |||
| unless ddd is less than the | less ddd is less than the | |||
| limit set by the caller of pcre2_match() or pcre2_dfa_match() or, if | limit set by the caller of pcre2_match() or pcre2_dfa_match() or, i | |||
| no such limit is set, less | f no such limit is set, less | |||
| than the default. | than the default. | |||
| int pcre2_set_depth_limit(pcre2_match_context *mcontext, | int pcre2_set_depth_limit(pcre2_match_context *mcontext, | |||
| uint32_t value); | uint32_t value); | |||
| This parameter limits the depth of nested backtracking in pcre2_ | This parameter limits the depth of nested backtracking in pcre2_matc | |||
| match(). Each time a nested | h(). Each time a nested | |||
| backtracking point is passed, a new memory frame is used to remember | backtracking point is passed, a new memory frame is used to remem | |||
| the state of matching at | ber the state of matching at | |||
| that point. Thus, this parameter indirectly limits the amount of mem ory that is used in a match. | that point. Thus, this parameter indirectly limits the amount of mem ory that is used in a match. | |||
| However, because the size of each memory frame depends on the numb | However, because the size of each memory frame depends on the number | |||
| er of capturing parentheses, | of capturing parentheses, | |||
| the actual memory limit varies from pattern to pattern. This limit w | the actual memory limit varies from pattern to pattern. This limit | |||
| as more useful in versions | was more useful in versions | |||
| before 10.30, where function recursion was used for backtracking. | before 10.30, where function recursion was used for backtracking. | |||
| The depth limit is not relevant, and is ignored, when matching is d one using JIT compiled code. | The depth limit is not relevant, and is ignored, when matching is do ne using JIT compiled code. | |||
| However, it is supported by pcre2_dfa_match(), which uses it to limi t the depth of nested inter‐ | However, it is supported by pcre2_dfa_match(), which uses it to limi t the depth of nested inter‐ | |||
| nal recursive function calls that implement atomic groups, lookaroun d assertions, and pattern | nal recursive function calls that implement atomic groups, lookar ound assertions, and pattern | |||
| recursions. This limits, indirectly, the amount of system stack that is used. It was more useful | recursions. This limits, indirectly, the amount of system stack that is used. It was more useful | |||
| in versions before 10.32, when stack memory was used for local wor | in versions before 10.32, when stack memory was used for local works | |||
| kspace vectors for recursive | pace vectors for recursive | |||
| function calls. From version 10.32, only local variables are allocat | function calls. From version 10.32, only local variables are alloca | |||
| ed on the stack and as each | ted on the stack and as each | |||
| call uses only a few hundred bytes, even a small stack can support q uite a lot of recursion. | call uses only a few hundred bytes, even a small stack can support q uite a lot of recursion. | |||
| If the depth of internal recursive function calls is great enough, | If the depth of internal recursive function calls is great enough, l | |||
| local workspace vectors are | ocal workspace vectors are | |||
| allocated on the heap from version 10.32 onwards, so the depth limit | allocated on the heap from version 10.32 onwards, so the depth limi | |||
| also indirectly limits the | t also indirectly limits the | |||
| amount of heap memory that is used. A recursive pattern such as /( | amount of heap memory that is used. A recursive pattern such as /(.( | |||
| .(?2))((?1)|)/, when matched | ?2))((?1)|)/, when matched | |||
| to a very long string using pcre2_dfa_match(), can use a great deal | to a very long string using pcre2_dfa_match(), can use a great de | |||
| of memory. However, it is | al of memory. However, it is | |||
| probably better to limit heap usage directly by calling pcre2_set_he ap_limit(). | probably better to limit heap usage directly by calling pcre2_set_he ap_limit(). | |||
| The default value for the depth limit can be set when PCRE2 is buil | The default value for the depth limit can be set when PCRE2 is built | |||
| t; if it is not, the default | ; if it is not, the default | |||
| is set to the same value as the default for the match limit. I | is set to the same value as the default for the match limit | |||
| f the limit is exceeded, | . If the limit is exceeded, | |||
| pcre2_match() or pcre2_dfa_match() returns PCRE2_ERROR_DEPTHLIMIT. | pcre2_match() or pcre2_dfa_match() returns PCRE2_ERROR_DEPTHLIMIT. A | |||
| A value for the depth limit | value for the depth limit | |||
| may also be supplied by an item at the start of a pattern of the for m | may also be supplied by an item at the start of a pattern of the for m | |||
| (*LIMIT_DEPTH=ddd) | (*LIMIT_DEPTH=ddd) | |||
| where ddd is a decimal number. However, such a setting is ignored un | where ddd is a decimal number. However, such a setting is ignored | |||
| less ddd is less than the | unless ddd is less than the | |||
| limit set by the caller of pcre2_match() or pcre2_dfa_match() or, i | limit set by the caller of pcre2_match() or pcre2_dfa_match() or, if | |||
| f no such limit is set, less | no such limit is set, less | |||
| than the default. | than the default. | |||
| CHECKING BUILD-TIME OPTIONS | CHECKING BUILD-TIME OPTIONS | |||
| int pcre2_config(uint32_t what, void *where); | int pcre2_config(uint32_t what, void *where); | |||
| The function pcre2_config() makes it possible for a PCRE2 client to | The function pcre2_config() makes it possible for a PCRE2 client | |||
| find the value of certain | to find the value of certain | |||
| configuration parameters and to discover which optional features | configuration parameters and to discover which optional features hav | |||
| have been compiled into the | e been compiled into the | |||
| PCRE2 library. The pcre2build documentation has more details about t hese features. | PCRE2 library. The pcre2build documentation has more details about t hese features. | |||
| The first argument for pcre2_config() specifies which information is | The first argument for pcre2_config() specifies which information i | |||
| required. The second argu‐ | s required. The second argu‐ | |||
| ment is a pointer to memory into which the information is placed. | ment is a pointer to memory into which the information is placed. If | |||
| If NULL is passed, the func‐ | NULL is passed, the func‐ | |||
| tion returns the amount of memory that is needed for the requested i | tion returns the amount of memory that is needed for the requested | |||
| nformation. For calls that | information. For calls that | |||
| return numerical values, the value is in bytes; when requesting thes e values, where should point | return numerical values, the value is in bytes; when requesting thes e values, where should point | |||
| to appropriately aligned memory. For calls that return strings, the required length is given in | to appropriately aligned memory. For calls that return strings, the required length is given in | |||
| code units, not counting the terminating zero. | code units, not counting the terminating zero. | |||
| When requesting information, the returned value from pcre2_config() | When requesting information, the returned value from pcre2_config() | |||
| is non-negative on success, | is non-negative on success, | |||
| or the negative error code PCRE2_ERROR_BADOPTION if the value in th | or the negative error code PCRE2_ERROR_BADOPTION if the value in the | |||
| e first argument is not rec‐ | first argument is not rec‐ | |||
| ognized. The following information is available: | ognized. The following information is available: | |||
| PCRE2_CONFIG_BSR | PCRE2_CONFIG_BSR | |||
| The output is a uint32_t integer whose value indicates what characte r sequences the \R escape | The output is a uint32_t integer whose value indicates what chara cter sequences the \R escape | |||
| sequence matches by default. A value of PCRE2_BSR_UNICODE means that \R matches any Unicode line | sequence matches by default. A value of PCRE2_BSR_UNICODE means that \R matches any Unicode line | |||
| ending sequence; a value of PCRE2_BSR_ANYCRLF means that \R match es only CR, LF, or CRLF. The | ending sequence; a value of PCRE2_BSR_ANYCRLF means that \R matches only CR, LF, or CRLF. The | |||
| default can be overridden when a pattern is compiled. | default can be overridden when a pattern is compiled. | |||
| PCRE2_CONFIG_COMPILED_WIDTHS | PCRE2_CONFIG_COMPILED_WIDTHS | |||
| The output is a uint32_t integer whose lower bits indicate which cod e unit widths were selected | The output is a uint32_t integer whose lower bits indicate which co de unit widths were selected | |||
| when PCRE2 was built. The 1-bit indicates 8-bit support, and the 2-b it and 4-bit indicate 16-bit | when PCRE2 was built. The 1-bit indicates 8-bit support, and the 2-b it and 4-bit indicate 16-bit | |||
| and 32-bit support, respectively. | and 32-bit support, respectively. | |||
| PCRE2_CONFIG_DEPTHLIMIT | PCRE2_CONFIG_DEPTHLIMIT | |||
| The output is a uint32_t integer that gives the default limit for th e depth of nested backtrack‐ | The output is a uint32_t integer that gives the default limit for th e depth of nested backtrack‐ | |||
| ing in pcre2_match() or the depth of nested recursions, looka rounds, and atomic groups in | ing in pcre2_match() or the depth of nested recursions, lookaroun ds, and atomic groups in | |||
| pcre2_dfa_match(). Further details are given with pcre2_set_depth_li mit() above. | pcre2_dfa_match(). Further details are given with pcre2_set_depth_li mit() above. | |||
| PCRE2_CONFIG_HEAPLIMIT | PCRE2_CONFIG_HEAPLIMIT | |||
| The output is a uint32_t integer that gives, in kibibytes, the defau | The output is a uint32_t integer that gives, in kibibytes, the def | |||
| lt limit for the amount of | ault limit for the amount of | |||
| heap memory used by pcre2_match() or pcre2_dfa_match(). Furth | heap memory used by pcre2_match() or pcre2_dfa_match(). Furthe | |||
| er details are given with | r details are given with | |||
| pcre2_set_heap_limit() above. | pcre2_set_heap_limit() above. | |||
| PCRE2_CONFIG_JIT | PCRE2_CONFIG_JIT | |||
| The output is a uint32_t integer that is set to one if support for j ust-in-time compiling is in‐ | The output is a uint32_t integer that is set to one if support for j ust-in-time compiling is in‐ | |||
| cluded in the library; otherwise it is set to zero. Note that having | cluded in the library; otherwise it is set to zero. Note that havin | |||
| the support in the library | g the support in the library | |||
| does not guarantee that JIT will be used for any given match. See th | does not guarantee that JIT will be used for any given match, and ne | |||
| e pcre2jit documentation for | ither does it guarantee that | |||
| more details. | JIT will actually be able to function, because it may not be able to | |||
| allocate executable memory | ||||
| in some environments. There is a special call to pcre2_jit_compile | ||||
| () that can be used to check | ||||
| this. See the pcre2jit documentation for more details. | ||||
| PCRE2_CONFIG_JITTARGET | PCRE2_CONFIG_JITTARGET | |||
| The where argument should point to a buffer that is at least 48 | The where argument should point to a buffer that is at least 48 cod | |||
| code units long. (The exact | e units long. (The exact | |||
| length required can be found by calling pcre2_config() with where se | length required can be found by calling pcre2_config() with where | |||
| t to NULL.) The buffer is | set to NULL.) The buffer is | |||
| filled with a string that contains the name of the architecture f | filled with a string that contains the name of the architecture for | |||
| or which the JIT compiler is | which the JIT compiler is | |||
| configured, for example "x86 32bit (little endian + unaligned)". If | configured, for example "x86 32bit (little endian + unaligned)". | |||
| JIT support is not avail‐ | If JIT support is not avail‐ | |||
| able, PCRE2_ERROR_BADOPTION is returned, otherwise the number of | able, PCRE2_ERROR_BADOPTION is returned, otherwise the number of cod | |||
| code units used is returned. | e units used is returned. | |||
| This is the length of the string, plus one unit for the terminating zero. | This is the length of the string, plus one unit for the terminating zero. | |||
| PCRE2_CONFIG_LINKSIZE | PCRE2_CONFIG_LINKSIZE | |||
| The output is a uint32_t integer that contains the number of bytes u sed for internal linkage in | The output is a uint32_t integer that contains the number of bytes used for internal linkage in | |||
| compiled regular expressions. When PCRE2 is configured, the value ca n be set to 2, 3, or 4, with | compiled regular expressions. When PCRE2 is configured, the value ca n be set to 2, 3, or 4, with | |||
| the default being 2. This is the value that is returned by pcre2 _config(). However, when the | the default being 2. This is the value that is returned by pcre2_co nfig(). However, when the | |||
| 16-bit library is compiled, a value of 3 is rounded up to 4, and whe n the 32-bit library is com‐ | 16-bit library is compiled, a value of 3 is rounded up to 4, and whe n the 32-bit library is com‐ | |||
| piled, internal linkages always use 4 bytes, so the configured value is not relevant. | piled, internal linkages always use 4 bytes, so the configured value is not relevant. | |||
| The default value of 2 for the 8-bit and 16-bit libraries is suffici ent for all but the most | The default value of 2 for the 8-bit and 16-bit libraries is suf ficient for all but the most | |||
| massive patterns, since it allows the size of the compiled pattern t o be up to 65535 code units. | massive patterns, since it allows the size of the compiled pattern t o be up to 65535 code units. | |||
| Larger values allow larger regular expressions to be compiled by tho se two libraries, but at the | Larger values allow larger regular expressions to be compiled by tho se two libraries, but at the | |||
| expense of slower matching. | expense of slower matching. | |||
| PCRE2_CONFIG_MATCHLIMIT | PCRE2_CONFIG_MATCHLIMIT | |||
| The output is a uint32_t integer that gives the default match limi t for pcre2_match(). Further | The output is a uint32_t integer that gives the default match limit for pcre2_match(). Further | |||
| details are given with pcre2_set_match_limit() above. | details are given with pcre2_set_match_limit() above. | |||
| PCRE2_CONFIG_NEWLINE | PCRE2_CONFIG_NEWLINE | |||
| The output is a uint32_t integer whose value specifies the default c haracter sequence that is | The output is a uint32_t integer whose value specifies the defaul t character sequence that is | |||
| recognized as meaning "newline". The values are: | recognized as meaning "newline". The values are: | |||
| PCRE2_NEWLINE_CR Carriage return (CR) | PCRE2_NEWLINE_CR Carriage return (CR) | |||
| PCRE2_NEWLINE_LF Linefeed (LF) | PCRE2_NEWLINE_LF Linefeed (LF) | |||
| PCRE2_NEWLINE_CRLF Carriage return, linefeed (CRLF) | PCRE2_NEWLINE_CRLF Carriage return, linefeed (CRLF) | |||
| PCRE2_NEWLINE_ANY Any Unicode line ending | PCRE2_NEWLINE_ANY Any Unicode line ending | |||
| PCRE2_NEWLINE_ANYCRLF Any of CR, LF, or CRLF | PCRE2_NEWLINE_ANYCRLF Any of CR, LF, or CRLF | |||
| PCRE2_NEWLINE_NUL The NUL character (binary zero) | PCRE2_NEWLINE_NUL The NUL character (binary zero) | |||
| The default should normally correspond to the standard sequence for your operating system. | The default should normally correspond to the standard sequence for your operating system. | |||
| PCRE2_CONFIG_NEVER_BACKSLASH_C | PCRE2_CONFIG_NEVER_BACKSLASH_C | |||
| The output is a uint32_t integer that is set to one if the use of \C was permanently disabled | The output is a uint32_t integer that is set to one if the use of \C was permanently disabled | |||
| when PCRE2 was built; otherwise it is set to zero. | when PCRE2 was built; otherwise it is set to zero. | |||
| PCRE2_CONFIG_PARENSLIMIT | PCRE2_CONFIG_PARENSLIMIT | |||
| The output is a uint32_t integer that gives the maximum depth of nes | The output is a uint32_t integer that gives the maximum depth of ne | |||
| ting of parentheses (of any | sting of parentheses (of any | |||
| kind) in a pattern. This limit is imposed to cap the amount of syst | kind) in a pattern. This limit is imposed to cap the amount of syste | |||
| em stack used when a pattern | m stack used when a pattern | |||
| is compiled. It is specified when PCRE2 is built; the default is 250 | is compiled. It is specified when PCRE2 is built; the default is 2 | |||
| . This limit does not take | 50. This limit does not take | |||
| into account the stack that may already be used by the calling ap | into account the stack that may already be used by the calling appli | |||
| plication. For finer control | cation. For finer control | |||
| over compilation stack usage, see pcre2_set_compile_recursion_guard( ). | over compilation stack usage, see pcre2_set_compile_recursion_guard( ). | |||
| PCRE2_CONFIG_STACKRECURSE | PCRE2_CONFIG_STACKRECURSE | |||
| This parameter is obsolete and should not be used in new code. The o utput is a uint32_t integer | This parameter is obsolete and should not be used in new code. The output is a uint32_t integer | |||
| that is always set to zero. | that is always set to zero. | |||
| PCRE2_CONFIG_TABLES_LENGTH | PCRE2_CONFIG_TABLES_LENGTH | |||
| The output is a uint32_t integer that gives the length of PCRE2's ch aracter processing tables in | The output is a uint32_t integer that gives the length of PCRE2's ch aracter processing tables in | |||
| bytes. For details of these tables see the section on locale support below. | bytes. For details of these tables see the section on locale support below. | |||
| PCRE2_CONFIG_UNICODE_VERSION | PCRE2_CONFIG_UNICODE_VERSION | |||
| The where argument should point to a buffer that is at least 24 | The where argument should point to a buffer that is at least 24 cod | |||
| code units long. (The exact | e units long. (The exact | |||
| length required can be found by calling pcre2_config() with where se | length required can be found by calling pcre2_config() with wher | |||
| t to NULL.) If PCRE2 has | e set to NULL.) If PCRE2 has | |||
| been compiled without Unicode support, the buffer is filled wit | been compiled without Unicode support, the buffer is filled with th | |||
| h the text "Unicode not sup‐ | e text "Unicode not sup‐ | |||
| ported". Otherwise, the Unicode version string (for example, "8.0.0" ) is inserted. The number of | ported". Otherwise, the Unicode version string (for example, "8.0.0" ) is inserted. The number of | |||
| code units used is returned. This is the length of the string plus o ne unit for the terminating | code units used is returned. This is the length of the string plus one unit for the terminating | |||
| zero. | zero. | |||
| PCRE2_CONFIG_UNICODE | PCRE2_CONFIG_UNICODE | |||
| The output is a uint32_t integer that is set to one if Unicode sup port is available; otherwise | The output is a uint32_t integer that is set to one if Unicode suppo rt is available; otherwise | |||
| it is set to zero. Unicode support implies UTF support. | it is set to zero. Unicode support implies UTF support. | |||
| PCRE2_CONFIG_VERSION | PCRE2_CONFIG_VERSION | |||
| The where argument should point to a buffer that is at least 24 cod | The where argument should point to a buffer that is at least 24 | |||
| e units long. (The exact | code units long. (The exact | |||
| length required can be found by calling pcre2_config() with where | length required can be found by calling pcre2_config() with where se | |||
| set to NULL.) The buffer is | t to NULL.) The buffer is | |||
| filled with the PCRE2 version string, zero-terminated. The number of | filled with the PCRE2 version string, zero-terminated. The numb | |||
| code units used is re‐ | er of code units used is re‐ | |||
| turned. This is the length of the string plus one unit for the termi nating zero. | turned. This is the length of the string plus one unit for the termi nating zero. | |||
| COMPILING A PATTERN | COMPILING A PATTERN | |||
| pcre2_code *pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE length, | pcre2_code *pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE length, | |||
| uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset, | uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset, | |||
| pcre2_compile_context *ccontext); | pcre2_compile_context *ccontext); | |||
| void pcre2_code_free(pcre2_code *code); | void pcre2_code_free(pcre2_code *code); | |||
| pcre2_code *pcre2_code_copy(const pcre2_code *code); | pcre2_code *pcre2_code_copy(const pcre2_code *code); | |||
| pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *code); | pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *code); | |||
| The pcre2_compile() function compiles a pattern into an internal f orm. The pattern is defined | The pcre2_compile() function compiles a pattern into an internal for m. The pattern is defined | |||
| by a pointer to a string of code units and a length in code units. I f the pattern is zero-termi‐ | by a pointer to a string of code units and a length in code units. I f the pattern is zero-termi‐ | |||
| nated, the length can be specified as PCRE2_ZERO_TERMINATED. A NUL | nated, the length can be specified as PCRE2_ZERO_TERMINATED. A | |||
| L pattern pointer with a | NULL pattern pointer with a | |||
| length of zero is treated as an empty string (NULL with a non-zer | length of zero is treated as an empty string (NULL with a non-zero l | |||
| o length causes an error re‐ | ength causes an error re‐ | |||
| turn). The function returns a pointer to a block of memory that cont | turn). The function returns a pointer to a block of memory that c | |||
| ains the compiled pattern | ontains the compiled pattern | |||
| and related data, or NULL if an error occurred. | and related data, or NULL if an error occurred. | |||
| If the compile context argument ccontext is NULL, memory for the com piled pattern is obtained by | If the compile context argument ccontext is NULL, memory for the com piled pattern is obtained by | |||
| calling malloc(). Otherwise, it is obtained from the same memory fu | calling malloc(). Otherwise, it is obtained from the same memory fun | |||
| nction that was used for the | ction that was used for the | |||
| compile context. The caller must free the memory by calling pcre2_c | compile context. The caller must free the memory by calling pcr | |||
| ode_free() when it is no | e2_code_free() when it is no | |||
| longer needed. If pcre2_code_free() is called with a NULL argum | longer needed. If pcre2_code_free() is called with a NULL argument | |||
| ent, it returns immediately, | , it returns immediately, | |||
| without doing anything. | without doing anything. | |||
| The function pcre2_code_copy() makes a copy of the compiled code in | The function pcre2_code_copy() makes a copy of the compiled code i | |||
| new memory, using the same | n new memory, using the same | |||
| memory allocator as was used for the original. However, if the co | memory allocator as was used for the original. However, if the code | |||
| de has been processed by the | has been processed by the | |||
| JIT compiler (see below), the JIT information cannot be copied (beca | JIT compiler (see below), the JIT information cannot be copied (b | |||
| use it is position-depen‐ | ecause it is position-depen‐ | |||
| dent). The new copy can initially be used only for non-JIT matching , though it can be passed to | dent). The new copy can initially be used only for non-JIT matching , though it can be passed to | |||
| pcre2_jit_compile() if required. If pcre2_code_copy() is called with a NULL argument, it returns | pcre2_jit_compile() if required. If pcre2_code_copy() is called with a NULL argument, it returns | |||
| NULL. | NULL. | |||
| The pcre2_code_copy() function provides a way for individual threads in a multithreaded applica‐ | The pcre2_code_copy() function provides a way for individual threads in a multithreaded applica‐ | |||
| tion to acquire a private copy of shared compiled code. However, it does not make a copy of the | tion to acquire a private copy of shared compiled code. However, it does not make a copy of the | |||
| character tables used by the compiled pattern; the new pattern code points to the same tables as | character tables used by the compiled pattern; the new pattern code points to the same tables as | |||
| the original code. (See "Locale Support" below for details of thes | the original code. (See "Locale Support" below for details of these | |||
| e character tables.) In many | character tables.) In many | |||
| applications the same tables are used throughout, so this behaviour | applications the same tables are used throughout, so this behavio | |||
| is appropriate. Neverthe‐ | ur is appropriate. Neverthe‐ | |||
| less, there are occasions when a copy of a compiled pattern and the | less, there are occasions when a copy of a compiled pattern and the | |||
| relevant tables are needed. | relevant tables are needed. | |||
| The pcre2_code_copy_with_tables() provides this facility. Copies of | The pcre2_code_copy_with_tables() provides this facility. Copies | |||
| both the code and the ta‐ | of both the code and the ta‐ | |||
| bles are made, with the new code pointing to the new tables. The | bles are made, with the new code pointing to the new tables. The mem | |||
| memory for the new tables is | ory for the new tables is | |||
| automatically freed when pcre2_code_free() is called for the new cop | automatically freed when pcre2_code_free() is called for the new c | |||
| y of the compiled code. If | opy of the compiled code. If | |||
| pcre2_code_copy_with_tables() is called with a NULL argument, it ret urns NULL. | pcre2_code_copy_with_tables() is called with a NULL argument, it ret urns NULL. | |||
| NOTE: When one of the matching functions is called, pointers to | NOTE: When one of the matching functions is called, pointers to the | |||
| the compiled pattern and the | compiled pattern and the | |||
| subject string are set in the match data block so that they can be r | subject string are set in the match data block so that they can be | |||
| eferenced by the substring | referenced by the substring | |||
| extraction functions after a successful match. After running a mat | extraction functions after a successful match. After running a matc | |||
| ch, you must not free a com‐ | h, you must not free a com‐ | |||
| piled pattern or a subject string until after all operations on the | piled pattern or a subject string until after all operations on the | |||
| match data block have taken | match data block have taken | |||
| place, unless, in the case of the subject string, you have used th | place, unless, in the case of the subject string, you have used the | |||
| e PCRE2_COPY_MATCHED_SUBJECT | PCRE2_COPY_MATCHED_SUBJECT | |||
| option, which is described in the section entitled "Option bits for pcre2_match()" below. | option, which is described in the section entitled "Option bits for pcre2_match()" below. | |||
| The options argument for pcre2_compile() contains various bit settin gs that affect the compila‐ | The options argument for pcre2_compile() contains various bit setti ngs that affect the compila‐ | |||
| tion. It should be zero if none of them are required. The available options are described below. | tion. It should be zero if none of them are required. The available options are described below. | |||
| Some of them (in particular, those that are compatible with Perl, | Some of them (in particular, those that are compatible with Perl, bu | |||
| but some others as well) can | t some others as well) can | |||
| also be set and unset from within the pattern (see the detailed desc | also be set and unset from within the pattern (see the detailed des | |||
| ription in the pcre2pattern | cription in the pcre2pattern | |||
| documentation). | documentation). | |||
| For those options that can be different in different parts of the | For those options that can be different in different parts of the pa | |||
| pattern, the contents of the | ttern, the contents of the | |||
| options argument specifies their settings at the start of compil | options argument specifies their settings at the start of comp | |||
| ation. The PCRE2_ANCHORED, | ilation. The PCRE2_ANCHORED, | |||
| PCRE2_ENDANCHORED, and PCRE2_NO_UTF_CHECK options can be set at the | PCRE2_ENDANCHORED, and PCRE2_NO_UTF_CHECK options can be set at the | |||
| time of matching as well as | time of matching as well as | |||
| at compile time. | at compile time. | |||
| Some additional options and less frequently required compile-time pa rameters (for example, the | Some additional options and less frequently required compile-time parameters (for example, the | |||
| newline setting) can be provided in a compile context (as described above). | newline setting) can be provided in a compile context (as described above). | |||
| If errorcode or erroroffset is NULL, pcre2_compile() returns NULL | If errorcode or erroroffset is NULL, pcre2_compile() returns NULL i | |||
| immediately. Otherwise, the | mmediately. Otherwise, the | |||
| variables to which these point are set to an error code and an offse | variables to which these point are set to an error code and an o | |||
| t (number of code units) | ffset (number of code units) | |||
| within the pattern, respectively, when pcre2_compile() returns NULL | within the pattern, respectively, when pcre2_compile() returns NULL | |||
| because a compilation error | because a compilation error | |||
| has occurred. | has occurred. | |||
| There are nearly 100 positive error codes that pcre2_compile() may r | There are over 100 positive error codes that pcre2_compile() may re | |||
| eturn if it finds an error | turn if it finds an error in | |||
| in the pattern. There are also some negative error codes that are | the pattern. There are also some negative error codes that are used | |||
| used for invalid UTF strings | for invalid UTF strings when | |||
| when validity checking is in force. These are the same as g | validity checking is in force. These are the same as give | |||
| iven by pcre2_match() and | n by pcre2_match() and | |||
| pcre2_dfa_match(), and are described in the pcre2unicode documen tation. There is no separate | pcre2_dfa_match(), and are described in the pcre2unicode documen tation. There is no separate | |||
| documentation for the positive error codes, because the textual erro r messages that are obtained | documentation for the positive error codes, because the textual erro r messages that are obtained | |||
| by calling the pcre2_get_error_message() function (see "Obtaining a textual error message" be‐ | by calling the pcre2_get_error_message() function (see "Obtaining a textual error message" be‐ | |||
| low) should be self-explanatory. Macro names starting with PCRE2 _ERROR_ are defined for both | low) should be self-explanatory. Macro names starting with PCRE2 _ERROR_ are defined for both | |||
| positive and negative error codes in pcre2.h. When compilation is su ccessful errorcode is set to | positive and negative error codes in pcre2.h. When compilation is su ccessful errorcode is set to | |||
| a value that returns the message "no error" if passed to pcre2_get_e rror_message(). | a value that returns the message "no error" if passed to pcre2_get_e rror_message(). | |||
| The value returned in erroroffset is an indication of where in the p attern an error occurred. | The value returned in erroroffset is an indication of where in the p attern an error occurred. | |||
| When there is no error, zero is returned. A non-zero value is not ne cessarily the furthest point | When there is no error, zero is returned. A non-zero value is not ne cessarily the furthest point | |||
| in the pattern that was read. For example, after the error "lookb ehind assertion is not fixed | in the pattern that was read. For example, after the error "lookb ehind assertion is not fixed | |||
| skipping to change at line 1132 | skipping to change at line 1242 | |||
| placement strings passed to pcre2_substitute(). | placement strings passed to pcre2_substitute(). | |||
| PCRE2_ALT_CIRCUMFLEX | PCRE2_ALT_CIRCUMFLEX | |||
| In multiline mode (when PCRE2_MULTILINE is set), the circumflex met acharacter matches at the | In multiline mode (when PCRE2_MULTILINE is set), the circumflex met acharacter matches at the | |||
| start of the subject (unless PCRE2_NOTBOL is set), and also after an y internal newline. However, | start of the subject (unless PCRE2_NOTBOL is set), and also after an y internal newline. However, | |||
| it does not match after a newline at the end of the subject, for com patibility with Perl. If you | it does not match after a newline at the end of the subject, for com patibility with Perl. If you | |||
| want a multiline circumflex also to match after a terminati ng newline, you must set | want a multiline circumflex also to match after a terminati ng newline, you must set | |||
| PCRE2_ALT_CIRCUMFLEX. | PCRE2_ALT_CIRCUMFLEX. | |||
| PCRE2_ALT_EXTENDED_CLASS | ||||
| Alters the parsing of character classes to follow the extended sy | ||||
| ntax described by Unicode | ||||
| UTS#18. The PCRE2_ALT_EXTENDED_CLASS option has no impact on the be | ||||
| haviour of the Perl-specific | ||||
| "(?[...])" syntax for extended classes, but instead enables the alte | ||||
| rnative syntax of extended | ||||
| class behaviour inside ordinary "[...]" character classes. See th | ||||
| e pcre2pattern documentation | ||||
| for details of the character classes supported. | ||||
| PCRE2_ALT_VERBNAMES | PCRE2_ALT_VERBNAMES | |||
| By default, for compatibility with Perl, the name in any verb sequen ce such as (*MARK:NAME) is | By default, for compatibility with Perl, the name in any verb sequen ce such as (*MARK:NAME) is | |||
| any sequence of characters that does not include a closing parenthesis. The name is not | any sequence of characters that does not include a closing parenthesis. The name is not | |||
| processed in any way, and it is not possible to include a closing pa renthesis in the name. How‐ | processed in any way, and it is not possible to include a closing pa renthesis in the name. How‐ | |||
| ever, if the PCRE2_ALT_VERBNAMES option is set, normal backslash p rocessing is applied to verb | ever, if the PCRE2_ALT_VERBNAMES option is set, normal backslash p rocessing is applied to verb | |||
| names and only an unescaped closing parenthesis terminates the name. A closing parenthesis can | names and only an unescaped closing parenthesis terminates the name. A closing parenthesis can | |||
| be included in a name either as \) or between \Q and \E. If the PCRE2_EXTENDED or PCRE2_EX‐ | be included in a name either as \) or between \Q and \E. If the PCRE2_EXTENDED or PCRE2_EX‐ | |||
| TENDED_MORE option is set with PCRE2_ALT_VERBNAMES, unescaped whi tespace in verb names is | TENDED_MORE option is set with PCRE2_ALT_VERBNAMES, unescaped whi tespace in verb names is | |||
| skipped and #-comments are recognized, exactly as in the rest of the pattern. | skipped and #-comments are recognized, exactly as in the rest of the pattern. | |||
| skipping to change at line 1155 | skipping to change at line 1273 | |||
| If this bit is set, pcre2_compile() automatically inserts callout items, all with number 255, | If this bit is set, pcre2_compile() automatically inserts callout items, all with number 255, | |||
| before each pattern item, except immediately before or after an expl icit callout in the pattern. | before each pattern item, except immediately before or after an expl icit callout in the pattern. | |||
| For discussion of the callout facility, see the pcre2callout documen tation. | For discussion of the callout facility, see the pcre2callout documen tation. | |||
| PCRE2_CASELESS | PCRE2_CASELESS | |||
| If this bit is set, letters in the pattern match both upper and lowe r case letters in the sub‐ | If this bit is set, letters in the pattern match both upper and lowe r case letters in the sub‐ | |||
| ject. It is equivalent to Perl's /i option, and it can be changed wi thin a pattern by a (?i) op‐ | ject. It is equivalent to Perl's /i option, and it can be changed wi thin a pattern by a (?i) op‐ | |||
| tion setting. If either PCRE2_UTF or PCRE2_UCP is set, Unicode prope rties are used for all char‐ | tion setting. If either PCRE2_UTF or PCRE2_UCP is set, Unicode prope rties are used for all char‐ | |||
| acters with more than one other case, and for all characters whose code points are greater than | acters with more than one other case, and for all characters whose code points are greater than | |||
| U+007F. Note that there are two ASCII characters, K and S, that, in | U+007F. | |||
| addition to their lower case | ||||
| ASCII equivalents, are case-equivalent with U+212A (Kelvin sign) and | Note that there are two ASCII characters, K and S, that, in addition | |||
| U+017F (long S) respec‐ | to their lower case ASCII | |||
| tively. If you do not want this case equivalence, you can supp | equivalents, are case-equivalent with U+212A (Kelvin sign) and U+01 | |||
| ress it by setting PCRE2_EX‐ | 7F (long S) respectively. If | |||
| TRA_CASELESS_RESTRICT. | you do not want this case equivalence, you can suppress it by setti | |||
| ng PCRE2_EXTRA_CASELESS_RE‐ | ||||
| For lower valued characters with only one other case, a lookup table | STRICT. | |||
| is used for speed. When | ||||
| neither PCRE2_UTF nor PCRE2_UCP is set, a lookup table is used f | One language family, Turkish and Azeri, has its own case-insensiti | |||
| or all code points less than | vity rules, which can be se‐ | |||
| 256, and higher code points (available only in 16-bit or 32-bit mode | lected by setting PCRE2_EXTRA_TURKISH_CASING. This alters the behavi | |||
| ) are treated as not having | our of the 'i', 'I', U+0130 | |||
| (capital I with dot above), and U+0131 (small dotless i) characters. | ||||
| For lower valued characters with only one other case, a lookup t | ||||
| able is used for speed. When | ||||
| neither PCRE2_UTF nor PCRE2_UCP is set, a lookup table is used for a | ||||
| ll code points less than | ||||
| 256, and higher code points (available only in 16-bit or 32-bit mod | ||||
| e) are treated as not having | ||||
| another case. | another case. | |||
| From release 10.45 PCRE2_CASELESS also affects what some of the lett | ||||
| er-related Unicode property | ||||
| escapes (\p and \P) match. The properties Lu (upper case letter), Ll | ||||
| (lower case letter), and Lt | ||||
| (title case letter) are all treated as LC (cased letter) when PCRE2_ | ||||
| CASELESS is set. | ||||
| PCRE2_DOLLAR_ENDONLY | PCRE2_DOLLAR_ENDONLY | |||
| If this bit is set, a dollar metacharacter in the pattern matches on ly at the end of the subject | If this bit is set, a dollar metacharacter in the pattern matches on ly at the end of the subject | |||
| string. Without this option, a dollar also matches immediately be fore a newline at the end of | string. Without this option, a dollar also matches immediately be fore a newline at the end of | |||
| the string (but not before any other newlines). The PCRE2_DOLLAR_END ONLY option is ignored if | the string (but not before any other newlines). The PCRE2_DOLLAR_END ONLY option is ignored if | |||
| PCRE2_MULTILINE is set. There is no equivalent to this option i n Perl, and no way to set it | PCRE2_MULTILINE is set. There is no equivalent to this option i n Perl, and no way to set it | |||
| within a pattern. | within a pattern. | |||
| PCRE2_DOTALL | PCRE2_DOTALL | |||
| skipping to change at line 1325 | skipping to change at line 1453 | |||
| ing point in the middle of a multi-code-unit character. This opti on may be useful in applica‐ | ing point in the middle of a multi-code-unit character. This opti on may be useful in applica‐ | |||
| tions that process patterns from external sources. Note that there i s also a build-time option | tions that process patterns from external sources. Note that there i s also a build-time option | |||
| that permanently locks out the use of \C. | that permanently locks out the use of \C. | |||
| PCRE2_NEVER_UCP | PCRE2_NEVER_UCP | |||
| This option locks out the use of Unicode properties for handling \B, \b, \D, \d, \S, \s, \W, \w, | This option locks out the use of Unicode properties for handling \B, \b, \D, \d, \S, \s, \W, \w, | |||
| and some of the POSIX character classes, as described for the PCRE2_ UCP option below. In partic‐ | and some of the POSIX character classes, as described for the PCRE2_ UCP option below. In partic‐ | |||
| ular, it prevents the creator of the pattern from enabling this faci lity by starting the pattern | ular, it prevents the creator of the pattern from enabling this faci lity by starting the pattern | |||
| with (*UCP). This option may be useful in applications that pr ocess patterns from external | with (*UCP). This option may be useful in applications that pr ocess patterns from external | |||
| sources. The option combination PCRE_UCP and PCRE_NEVER_UCP causes a n error. | sources. The option combination PCRE2_UCP and PCRE2_NEVER_UCP causes an error. | |||
| PCRE2_NEVER_UTF | PCRE2_NEVER_UTF | |||
| This option locks out interpretation of the pattern as UTF-8, UTF-16 , or UTF-32, depending on | This option locks out interpretation of the pattern as UTF-8, UTF-16 , or UTF-32, depending on | |||
| which library is in use. In particular, it prevents the creator of t he pattern from switching to | which library is in use. In particular, it prevents the creator of t he pattern from switching to | |||
| UTF interpretation by starting the pattern with (*UTF). This opti on may be useful in applica‐ | UTF interpretation by starting the pattern with (*UTF). This opti on may be useful in applica‐ | |||
| tions that process patterns from external sources. The combi nation of PCRE2_UTF and | tions that process patterns from external sources. The combi nation of PCRE2_UTF and | |||
| PCRE2_NEVER_UTF causes an error. | PCRE2_NEVER_UTF causes an error. | |||
| PCRE2_NO_AUTO_CAPTURE | PCRE2_NO_AUTO_CAPTURE | |||
| If this option is set, it disables the use of numbered capturing par entheses in the pattern. Any | If this option is set, it disables the use of numbered capturing par entheses in the pattern. Any | |||
| opening parenthesis that is not followed by ? behaves as if it w ere followed by ?: but named | opening parenthesis that is not followed by ? behaves as if it w ere followed by ?: but named | |||
| parentheses can still be used for capturing (and they acquire number s in the usual way). This is | parentheses can still be used for capturing (and they acquire number s in the usual way). This is | |||
| the same as Perl's /n option. Note that, when this option is set, r eferences to capture groups | the same as Perl's /n option. Note that, when this option is set, r eferences to capture groups | |||
| (backreferences or recursion/subroutine calls) may only refer to nam ed groups, though the refer‐ | (backreferences or recursion/subroutine calls) may only refer to nam ed groups, though the refer‐ | |||
| ence can be by name or by number. | ence can be by name or by number. | |||
| PCRE2_NO_AUTO_POSSESS | PCRE2_NO_AUTO_POSSESS | |||
| If this option is set, it disables "auto-possessification", which | If this (deprecated) option is set, it disables "auto-possessifica | |||
| is an optimization that, for | tion", which is an optimiza‐ | |||
| example, turns a+b into a++b in order to avoid backtracks into a+ th | tion that, for example, turns a+b into a++b in order to avoid backtr | |||
| at can never be successful. | acks into a+ that can never | |||
| However, if callouts are in use, auto-possessification means that so | be successful. However, if callouts are in use, auto-possessificat | |||
| me callouts are never taken. | ion means that some callouts | |||
| You can set this option if you want the matching functions to do a | are never taken. You can set this option if you want the matching fu | |||
| full unoptimized search and | nctions to do a full unopti‐ | |||
| run all the callouts, but it is mainly provided for testing purposes | mized search and run all the callouts, but it is mainly provided for | |||
| . | testing purposes. | |||
| If a compile context is available, it is recommended to use pcre2_se | ||||
| t_optimize() with the direc‐ | ||||
| tive PCRE2_AUTO_POSSESS_OFF rather than the compile option PCRE2_ | ||||
| NO_AUTO_POSSESS. Note that | ||||
| PCRE2_NO_AUTO_POSSESS takes precedence over the pcre2_set_optimi | ||||
| ze() optimization directives | ||||
| PCRE2_AUTO_POSSESS and PCRE2_AUTO_POSSESS_OFF. | ||||
| PCRE2_NO_DOTSTAR_ANCHOR | PCRE2_NO_DOTSTAR_ANCHOR | |||
| If this option is set, it disables an optimization that is applied w | If this (deprecated) option is set, it disables an optimization that | |||
| hen .* is the first signifi‐ | is applied when .* is the | |||
| cant item in a top-level branch of a pattern, and all the other bran | first significant item in a top-level branch of a pattern, and all t | |||
| ches also start with .* or | he other branches also start | |||
| with \A or \G or ^. The optimization is automatically disabled for | with .* or with \A or \G or ^. The optimization is automatically dis | |||
| .* if it is inside an atomic | abled for .* if it is inside | |||
| group or a capture group that is the subject of a backreference, or | an atomic group or a capture group that is the subject of a back | |||
| if the pattern contains | reference, or if the pattern | |||
| (*PRUNE) or (*SKIP). When the optimization is not disabled, such a | contains (*PRUNE) or (*SKIP). When the optimization is not disabled, | |||
| pattern is automatically an‐ | such a pattern is automati‐ | |||
| chored if PCRE2_DOTALL is set for all the .* items and PCRE2_MULTILI | cally anchored if PCRE2_DOTALL is set for all the .* items and PCRE2 | |||
| NE is not set for any ^ | _MULTILINE is not set for | |||
| items. Otherwise, the fact that any match must start either at the | any ^ items. Otherwise, the fact that any match must start either at | |||
| start of the subject or fol‐ | the start of the subject or | |||
| lowing a newline is remembered. Like other optimizations, this can c | following a newline is remembered. Like other optimizations, t | |||
| ause callouts to be skipped. | his can cause callouts to be | |||
| skipped. (If a compile context is available, it is recommended to u | ||||
| se pcre2_set_optimize() with | ||||
| the directive PCRE2_DOTSTAR_ANCHOR_OFF instead.) | ||||
| PCRE2_NO_START_OPTIMIZE | PCRE2_NO_START_OPTIMIZE | |||
| This is an option whose main effect is at matching time. It does not change what pcre2_compile() | This is an option whose main effect is at matching time. It does not change what pcre2_compile() | |||
| generates, but it does affect the output of the JIT compiler. | generates, but it does affect the output of the JIT compiler. Settin | |||
| g this option is equivalent | ||||
| to calling pcre2_set_optimize() with the directive parameter set to | ||||
| PCRE2_START_OPTIMIZE_OFF. | ||||
| There are a number of optimizations that may occur at the start of a match, in order to speed up | There are a number of optimizations that may occur at the start of a match, in order to speed up | |||
| the process. For example, if it is known that an unanchored match mu st start with a specific | the process. For example, if it is known that an unanchored matc h must start with a specific | |||
| code unit value, the matching code searches the subject for that val ue, and fails immediately if | code unit value, the matching code searches the subject for that val ue, and fails immediately if | |||
| it cannot find it, without actually running the main matching func | it cannot find it, without actually running the main matching functi | |||
| tion. This means that a spe‐ | on. The start-up optimiza‐ | |||
| cial item such as (*COMMIT) at the start of a pattern is not conside | tions are in effect a pre-scan of the subject that takes place befor | |||
| red until after a suitable | e the pattern is run. | |||
| starting point for the match has been found. Also, when callouts | ||||
| or (*MARK) items are in use, | ||||
| these "start-up" optimizations can cause them to be skipped if the | ||||
| pattern is never actually | ||||
| used. The start-up optimizations are in effect a pre-scan of the sub | ||||
| ject that takes place before | ||||
| the pattern is run. | ||||
| The PCRE2_NO_START_OPTIMIZE option disables the start-up optimizatio | ||||
| ns, possibly causing perfor‐ | ||||
| mance to suffer, but ensuring that in cases where the result is "no | ||||
| match", the callouts do oc‐ | ||||
| cur, and that items such as (*COMMIT) and (*MARK) are considered at | ||||
| every possible starting po‐ | ||||
| sition in the subject string. | ||||
| Setting PCRE2_NO_START_OPTIMIZE may change the outcome of a match | ||||
| ing operation. Consider the | ||||
| pattern | ||||
| (*COMMIT)ABC | ||||
| When this is compiled, PCRE2 records the fact that a match must star | ||||
| t with the character "A". | ||||
| Suppose the subject string is "DEFABC". The start-up optimization sc | ||||
| ans along the subject, finds | ||||
| "A" and runs the first match attempt from there. The (*COMMIT) item | ||||
| means that the pattern must | ||||
| match the current starting position, which in this case, it does. Ho | ||||
| wever, if the same match is | ||||
| run with PCRE2_NO_START_OPTIMIZE set, the initial scan along the sub | ||||
| ject string does not happen. | ||||
| The first match attempt is run starting from "D" and when this f | ||||
| ails, (*COMMIT) prevents any | ||||
| further matches being tried, so the overall result is "no match". | ||||
| As another start-up optimization makes use of a minimum length for a | Disabling the start-up optimizations may cause performance to suff | |||
| matching subject, which is | er. However, this may be de‐ | |||
| recorded when possible. Consider the pattern | sirable for patterns which contain callouts or items such as (*COMM | |||
| IT) and (*MARK). See the | ||||
| (*MARK:1)B(*MARK:2)(X|Y) | above description of PCRE2_START_OPTIMIZE_OFF for further details. | |||
| The minimum length for a match is two characters. If the subject is | ||||
| "XXBB", the "starting char‐ | ||||
| acter" optimization skips "XX", then tries to match "BB", which is l | ||||
| ong enough. In the process, | ||||
| (*MARK:2) is encountered and remembered. When the match attempt f | ||||
| ails, the next "B" is found, | ||||
| but there is only one character left, so there are no more attempts, | ||||
| and "no match" is returned | ||||
| with the "last mark seen" set to "2". If NO_START_OPTIMIZE is set, h | ||||
| owever, matches are tried at | ||||
| every possible starting position, including at the end of the sub | ||||
| ject, where (*MARK:1) is en‐ | ||||
| countered, but there is no "B", so the "last mark seen" that is retu | ||||
| rned is "1". In this case, | ||||
| the optimizations do not affect the overall match result, which is s | ||||
| till "no match", but they do | ||||
| affect the auxiliary information that is returned. | ||||
| PCRE2_NO_UTF_CHECK | PCRE2_NO_UTF_CHECK | |||
| When PCRE2_UTF is set, the validity of the pattern as a UTF stri ng is automatically checked. | When PCRE2_UTF is set, the validity of the pattern as a UTF stri ng is automatically checked. | |||
| There are discussions about the validity of UTF-8 strings, UTF-16 st rings, and UTF-32 strings in | There are discussions about the validity of UTF-8 strings, UTF-16 st rings, and UTF-32 strings in | |||
| the pcre2unicode document. If an invalid UTF sequence is found, pcre 2_compile() returns a nega‐ | the pcre2unicode document. If an invalid UTF sequence is found, pcre 2_compile() returns a nega‐ | |||
| tive error code. | tive error code. | |||
| If you know that your pattern is a valid UTF string, and you want to skip this check for perfor‐ | If you know that your pattern is a valid UTF string, and you want to skip this check for perfor‐ | |||
| mance reasons, you can set the PCRE2_NO_UTF_CHECK option. When it i s set, the effect of passing | mance reasons, you can set the PCRE2_NO_UTF_CHECK option. When it i s set, the effect of passing | |||
| skipping to change at line 1444 | skipping to change at line 1547 | |||
| This option has two effects. Firstly, it change the way PCRE2 proce sses \B, \b, \D, \d, \S, \s, | This option has two effects. Firstly, it change the way PCRE2 proce sses \B, \b, \D, \d, \S, \s, | |||
| \W, \w, and some of the POSIX character classes. By default, only AS CII characters are recog‐ | \W, \w, and some of the POSIX character classes. By default, only AS CII characters are recog‐ | |||
| nized, but if PCRE2_UCP is set, Unicode properties are used to cl assify characters. There are | nized, but if PCRE2_UCP is set, Unicode properties are used to cl assify characters. There are | |||
| some PCRE2_EXTRA options (see below) that add finer control to this behaviour. More details are | some PCRE2_EXTRA options (see below) that add finer control to this behaviour. More details are | |||
| given in the section on generic character types in the pcre2pattern page. | given in the section on generic character types in the pcre2pattern page. | |||
| The second effect of PCRE2_UCP is to force the use of Unicode prope rties for upper/lower casing | The second effect of PCRE2_UCP is to force the use of Unicode prope rties for upper/lower casing | |||
| operations, even when PCRE2_UTF is not set. This makes it possible t o process strings in the | operations, even when PCRE2_UTF is not set. This makes it possible t o process strings in the | |||
| 16-bit UCS-2 code. This option is available only if PCRE2 has been c ompiled with Unicode support | 16-bit UCS-2 code. This option is available only if PCRE2 has been c ompiled with Unicode support | |||
| (which is the default). The PCRE2_EXTRA_CASELESS_RESTRICT option (s | (which is the default). | |||
| ee below) restricts caseless | ||||
| matching such that ASCII characters match only ASCII characters an | The PCRE2_EXTRA_CASELESS_RESTRICT option (see above) restricts casel | |||
| d non-ASCII characters match | ess matching such that ASCII | |||
| only non-ASCII characters. | characters match only ASCII characters and non-ASCII characters matc | |||
| h only non-ASCII characters. | ||||
| The PCRE2_EXTRA_TURKISH_CASING option (see above) alters the match | ||||
| ing of the 'i' characters to | ||||
| follow their behaviour in Turkish and Azeri languages. For further d | ||||
| etails on PCRE2_EXTRA_CASE‐ | ||||
| LESS_RESTRICT and PCRE2_EXTRA_TURKISH_CASING, see the pcre2unicode p | ||||
| age. | ||||
| PCRE2_UNGREEDY | PCRE2_UNGREEDY | |||
| This option inverts the "greediness" of the quantifiers so that they | This option inverts the "greediness" of the quantifiers so that the | |||
| are not greedy by default, | y are not greedy by default, | |||
| but become greedy if followed by "?". It is not compatible with P | but become greedy if followed by "?". It is not compatible with Perl | |||
| erl. It can also be set by a | . It can also be set by a | |||
| (?U) option setting within the pattern. | (?U) option setting within the pattern. | |||
| PCRE2_USE_OFFSET_LIMIT | PCRE2_USE_OFFSET_LIMIT | |||
| This option must be set for pcre2_compile() if pcre2_set_offset_limi t() is going to be used to | This option must be set for pcre2_compile() if pcre2_set_offset_li mit() is going to be used to | |||
| set a non-default offset limit in a match context for matches that u se this pattern. An error is | set a non-default offset limit in a match context for matches that u se this pattern. An error is | |||
| generated if an offset limit is set without this option. For more | generated if an offset limit is set without this option. For more de | |||
| details, see the description | tails, see the description | |||
| of pcre2_set_offset_limit() in the section that describes matc | of pcre2_set_offset_limit() in the section that describes mat | |||
| h contexts. See also the | ch contexts. See also the | |||
| PCRE2_FIRSTLINE option above. | PCRE2_FIRSTLINE option above. | |||
| PCRE2_UTF | PCRE2_UTF | |||
| This option causes PCRE2 to regard both the pattern and the sub ject strings that are subse‐ | This option causes PCRE2 to regard both the pattern and the subject strings that are subse‐ | |||
| quently processed as strings of UTF characters instead of single-cod e-unit strings. It is avail‐ | quently processed as strings of UTF characters instead of single-cod e-unit strings. It is avail‐ | |||
| able when PCRE2 is built to include Unicode support (which is the de fault). If Unicode support | able when PCRE2 is built to include Unicode support (which is the default). If Unicode support | |||
| is not available, the use of this option provokes an error. Details of how PCRE2_UTF changes the | is not available, the use of this option provokes an error. Details of how PCRE2_UTF changes the | |||
| behaviour of PCRE2 are given in the pcre2unicode page. In particul ar, note that it changes the | behaviour of PCRE2 are given in the pcre2unicode page. In particular , note that it changes the | |||
| way PCRE2_CASELESS works. | way PCRE2_CASELESS works. | |||
| Extra compile options | Extra compile options | |||
| The option bits that can be set in a compile context by calling the pcre2_set_compile_extra_op‐ | The option bits that can be set in a compile context by calling the pcre2_set_compile_extra_op‐ | |||
| tions() function are as follows: | tions() function are as follows: | |||
| PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK | PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK | |||
| Since release 10.38 PCRE2 has forbidden the use of \K within look | Since release 10.38 PCRE2 has forbidden the use of \K within lookar | |||
| around assertions, following | ound assertions, following | |||
| Perl's lead. This option is provided to re-enable the previous b | Perl's lead. This option is provided to re-enable the previou | |||
| ehaviour (act in positive | s behaviour (act in positive | |||
| lookarounds, ignore in negative ones) in case anybody is relying on it. | lookarounds, ignore in negative ones) in case anybody is relying on it. | |||
| PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES | PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES | |||
| This option applies when compiling a pattern in UTF-8 or UTF-32 mod | This option applies when compiling a pattern in UTF-8 or UTF-32 mode | |||
| e. It is forbidden in UTF-16 | . It is forbidden in UTF-16 | |||
| mode, and ignored in non-UTF modes. Unicode "surrogate" code points | mode, and ignored in non-UTF modes. Unicode "surrogate" code p | |||
| in the range 0xd800 to | oints in the range 0xd800 to | |||
| 0xdfff are used in pairs in UTF-16 to encode code points with va | 0xdfff are used in pairs in UTF-16 to encode code points with values | |||
| lues in the range 0x10000 to | in the range 0x10000 to | |||
| 0x10ffff. The surrogates cannot therefore be represented in UTF-16. | 0x10ffff. The surrogates cannot therefore be represented in UTF-16 | |||
| They can be represented in | . They can be represented in | |||
| UTF-8 and UTF-32, but are defined as invalid code points, and caus | UTF-8 and UTF-32, but are defined as invalid code points, and cause | |||
| e errors if encountered in a | errors if encountered in a | |||
| UTF-8 or UTF-32 string that is being checked for validity by PCRE2. | UTF-8 or UTF-32 string that is being checked for validity by PCRE2. | |||
| These values also cause errors if encountered in escape sequences su ch as \x{d912} within a pat‐ | These values also cause errors if encountered in escape sequences su ch as \x{d912} within a pat‐ | |||
| tern. However, it seems that some applications, when using PCRE2 to | tern. However, it seems that some applications, when using PCRE2 t | |||
| check for unwanted charac‐ | o check for unwanted charac‐ | |||
| ters in UTF-8 strings, explicitly test for the surrogates u | ters in UTF-8 strings, explicitly test for the surrogates us | |||
| sing escape sequences. The | ing escape sequences. The | |||
| PCRE2_NO_UTF_CHECK option does not disable the error that occurs, be cause it applies only to the | PCRE2_NO_UTF_CHECK option does not disable the error that occurs, be cause it applies only to the | |||
| testing of input strings for UTF validity. | testing of input strings for UTF validity. | |||
| If the extra option PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is set, surr ogate code point values in | If the extra option PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is set, su rrogate code point values in | |||
| UTF-8 and UTF-32 patterns no longer provoke errors and are incorpora ted in the compiled pattern. | UTF-8 and UTF-32 patterns no longer provoke errors and are incorpora ted in the compiled pattern. | |||
| However, they can only match subject characters if the matc hing function is called with | However, they can only match subject characters if the matching function is called with | |||
| PCRE2_NO_UTF_CHECK set. | PCRE2_NO_UTF_CHECK set. | |||
| PCRE2_EXTRA_ALT_BSUX | PCRE2_EXTRA_ALT_BSUX | |||
| The original option PCRE2_ALT_BSUX causes PCRE2 to process \U, \u, a | The original option PCRE2_ALT_BSUX causes PCRE2 to process \U, \u | |||
| nd \x in the way that EC‐ | , and \x in the way that EC‐ | |||
| MAscript (aka JavaScript) does. Additional functionality was defi | MAscript (aka JavaScript) does. Additional functionality was defined | |||
| ned by ECMAscript 6; setting | by ECMAscript 6; setting | |||
| PCRE2_EXTRA_ALT_BSUX has the effect of PCRE2_ALT_BSUX, but in additi | PCRE2_EXTRA_ALT_BSUX has the effect of PCRE2_ALT_BSUX, but in addi | |||
| on it recognizes \u{hhh..} | tion it recognizes \u{hhh..} | |||
| as a hexadecimal character code, where hhh.. is any number of hexade cimal digits. | as a hexadecimal character code, where hhh.. is any number of hexade cimal digits. | |||
| PCRE2_EXTRA_ASCII_BSD | PCRE2_EXTRA_ASCII_BSD | |||
| This option forces \d to match only ASCII digits, even when PCRE2_UC P is set. It can be changed | This option forces \d to match only ASCII digits, even when PCRE2_UC P is set. It can be changed | |||
| within a pattern by means of the (?aD) option setting. | within a pattern by means of the (?aD) option setting. | |||
| PCRE2_EXTRA_ASCII_BSS | PCRE2_EXTRA_ASCII_BSS | |||
| This option forces \s to match only ASCII space characters, even w hen PCRE2_UCP is set. It can | This option forces \s to match only ASCII space characters, even whe n PCRE2_UCP is set. It can | |||
| be changed within a pattern by means of the (?aS) option setting. | be changed within a pattern by means of the (?aS) option setting. | |||
| PCRE2_EXTRA_ASCII_BSW | PCRE2_EXTRA_ASCII_BSW | |||
| This option forces \w to match only ASCII word characters, even when PCRE2_UCP is set. It can be | This option forces \w to match only ASCII word characters, even when PCRE2_UCP is set. It can be | |||
| changed within a pattern by means of the (?aW) option setting. | changed within a pattern by means of the (?aW) option setting. | |||
| PCRE2_EXTRA_ASCII_DIGIT | PCRE2_EXTRA_ASCII_DIGIT | |||
| This option forces the POSIX character classes [:digit:] and [:xdigi t:] to match only ASCII dig‐ | This option forces the POSIX character classes [:digit:] and [:xdigi t:] to match only ASCII dig‐ | |||
| its, even when PCRE2_UCP is set. It can be changed within a pattern by means of the (?aT) option | its, even when PCRE2_UCP is set. It can be changed within a pattern by means of the (?aT) option | |||
| setting. | setting. | |||
| PCRE2_EXTRA_ASCII_POSIX | PCRE2_EXTRA_ASCII_POSIX | |||
| This option forces all the POSIX character classes, including [:digi t:] and [:xdigit:], to match | This option forces all the POSIX character classes, including [:digi t:] and [:xdigit:], to match | |||
| only ASCII characters, even when PCRE2_UCP is set. It can be changed | only ASCII characters, even when PCRE2_UCP is set. It can be chang | |||
| within a pattern by means | ed within a pattern by means | |||
| of the (?aP) option setting, but note that this also sets PCRE2_E | of the (?aP) option setting, but note that this also sets PCRE2_EXTR | |||
| XTRA_ASCII_DIGIT in order to | A_ASCII_DIGIT in order to | |||
| ensure that (?-aP) unsets all ASCII restrictions for POSIX classes. | ensure that (?-aP) unsets all ASCII restrictions for POSIX classes. | |||
| PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL | PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL | |||
| This is a dangerous option. Use with care. By default, an unrecogniz | This is a dangerous option. Use with care. By default, an unrecog | |||
| ed escape such as \j or a | nized escape such as \j or a | |||
| malformed one such as \x{2z} causes a compile-time error when detec | malformed one such as \x{2z} causes a compile-time error when detect | |||
| ted by pcre2_compile(). Perl | ed by pcre2_compile(). Perl | |||
| is somewhat inconsistent in handling such items: for example, \j is | is somewhat inconsistent in handling such items: for example, \j | |||
| treated as a literal "j", | is treated as a literal "j", | |||
| and non-hexadecimal digits in \x{} are just ignored, though warning | and non-hexadecimal digits in \x{} are just ignored, though warnings | |||
| s are given in both cases if | are given in both cases if | |||
| Perl's warning switch is enabled. However, a malformed octal number | Perl's warning switch is enabled. However, a malformed octal numbe | |||
| after \o{ always causes an | r after \o{ always causes an | |||
| error in Perl. | error in Perl. | |||
| If the PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL extra option is passed to p cre2_compile(), all unrecog‐ | If the PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL extra option is passed to p cre2_compile(), all unrecog‐ | |||
| nized or malformed escape sequences are treated as single-character | nized or malformed escape sequences are treated as single-character | |||
| escapes. For example, \j is | escapes. For example, \j is | |||
| a literal "j" and \x{2z} is treated as the literal string "x{2z}". | a literal "j" and \x{2z} is treated as the literal string "x{2z} | |||
| Setting this option means | ". Setting this option means | |||
| that typos in patterns may go undetected and have unexpected result | that typos in patterns may go undetected and have unexpected results | |||
| s. Also note that a sequence | . Also note that a sequence | |||
| such as [\N{] is interpreted as a malformed attempt at [\N{...}] an | such as [\N{] is interpreted as a malformed attempt at [\N{... | |||
| d so is treated as [N{] | }] and so is treated as [N{] | |||
| whereas [\N] gives an error because an unqualified \N is a valid esc ape sequence but is not sup‐ | whereas [\N] gives an error because an unqualified \N is a valid esc ape sequence but is not sup‐ | |||
| ported in a character class. To reiterate: this is a dangerous optio n. Use with great care. | ported in a character class. To reiterate: this is a dangerous optio n. Use with great care. | |||
| PCRE2_EXTRA_CASELESS_RESTRICT | PCRE2_EXTRA_CASELESS_RESTRICT | |||
| When either PCRE2_UCP or PCRE2_UTF is set, caseless matching follow s Unicode rules, which allow | When either PCRE2_UCP or PCRE2_UTF is set, caseless matching follows Unicode rules, which allow | |||
| for more than two cases per character. There are two case-equivalent character sets that contain | for more than two cases per character. There are two case-equivalent character sets that contain | |||
| both ASCII and non-ASCII characters. The ASCII letter S is case-equi valent to U+017f (long S) | both ASCII and non-ASCII characters. The ASCII letter S is case-e quivalent to U+017f (long S) | |||
| and the ASCII letter K is case-equivalent to U+212a (Kelvin sign). T his option disables recogni‐ | and the ASCII letter K is case-equivalent to U+212a (Kelvin sign). T his option disables recogni‐ | |||
| tion of case-equivalences that cross the ASCII/non-ASCII boundar | tion of case-equivalences that cross the ASCII/non-ASCII boundary. I | |||
| y. In a caseless match, both | n a caseless match, both | |||
| characters must either be ASCII or non-ASCII. The option can be chan | characters must either be ASCII or non-ASCII. The option can be cha | |||
| ged with a pattern by the | nged within a pattern by the | |||
| (?r) option setting. | (*CASELESS_RESTRICT) or (?r) option settings. | |||
| PCRE2_EXTRA_ESCAPED_CR_IS_LF | PCRE2_EXTRA_ESCAPED_CR_IS_LF | |||
| There are some legacy applications where the escape sequence \r | There are some legacy applications where the escape sequence \r in | |||
| in a pattern is expected to | a pattern is expected to | |||
| match a newline. If this option is set, \r in a pattern is converted | match a newline. If this option is set, \r in a pattern is converte | |||
| to \n so that it matches a | d to \n so that it matches a | |||
| LF (linefeed) instead of a CR (carriage return) character. The opti | LF (linefeed) instead of a CR (carriage return) character. The optio | |||
| on does not affect a literal | n does not affect a literal | |||
| CR in the pattern, nor does it affect CR specified as an explicit co de point such as \x{0D}. | CR in the pattern, nor does it affect CR specified as an explicit co de point such as \x{0D}. | |||
| PCRE2_EXTRA_MATCH_LINE | PCRE2_EXTRA_MATCH_LINE | |||
| This option is provided for use by the -x option of pcre2grep. It ca | This option is provided for use by the -x option of pcre2grep. I | |||
| uses the pattern only to | t causes the pattern only to | |||
| match complete lines. This is achieved by automatically insertin | match complete lines. This is achieved by automatically inserting th | |||
| g the code for "^(?:" at the | e code for "^(?:" at the | |||
| start of the compiled pattern and ")$" at the end. Thus, when PC | start of the compiled pattern and ")$" at the end. Thus, when | |||
| RE2_MULTILINE is set, the | PCRE2_MULTILINE is set, the | |||
| matched line may be in the middle of the subject string. This option can be used with PCRE2_LIT‐ | matched line may be in the middle of the subject string. This option can be used with PCRE2_LIT‐ | |||
| ERAL. | ERAL. | |||
| PCRE2_EXTRA_MATCH_WORD | PCRE2_EXTRA_MATCH_WORD | |||
| This option is provided for use by the -w option of pcre2grep. I | This option is provided for use by the -w option of pcre2grep. It ca | |||
| t causes the pattern only to | uses the pattern only to | |||
| match strings that have a word boundary at the start and the end. Th | match strings that have a word boundary at the start and the end. T | |||
| is is achieved by automati‐ | his is achieved by automati‐ | |||
| cally inserting the code for "\b(?:" at the start of the compiled p | cally inserting the code for "\b(?:" at the start of the compiled pa | |||
| attern and ")\b" at the end. | ttern and ")\b" at the end. | |||
| The option may be used with PCRE2_LITERAL. However, it is ignored if | The option may be used with PCRE2_LITERAL. However, it is ignored | |||
| PCRE2_EXTRA_MATCH_LINE is | if PCRE2_EXTRA_MATCH_LINE is | |||
| also set. | also set. | |||
| PCRE2_EXTRA_NO_BS0 | ||||
| If this option is set (note that its final character is the digit 0) | ||||
| it locks out the use of the | ||||
| sequence \0 unless at least one more octal digit follows. | ||||
| PCRE2_EXTRA_PYTHON_OCTAL | ||||
| If this option is set, PCRE2 follows Python's rules for interpreting | ||||
| octal escape sequences. The | ||||
| rules for handling sequences such as \14, which could be an octal nu | ||||
| mber or a back reference are | ||||
| different. Details are given in the pcre2pattern documentation. | ||||
| PCRE2_EXTRA_NEVER_CALLOUT | ||||
| If this option is set, PCRE2 treats callouts in the pattern as | ||||
| a syntax error, returning | ||||
| PCRE2_ERROR_CALLOUT_CALLER_DISABLED. This is useful if the applicati | ||||
| on knows that a callout will | ||||
| not be provided to pcre2_match(), so that callouts in the pattern ar | ||||
| e not silently ignored. | ||||
| PCRE2_EXTRA_TURKISH_CASING | ||||
| This option alters case-equivalence of the 'i' letters to follow | ||||
| the alphabet used by Turkish | ||||
| and Azeri languages. The option can be changed within a pattern by t | ||||
| he (*TURKISH_CASING) start- | ||||
| of-pattern setting. Either the UTF or UCP options must be set. In th | ||||
| e 8-bit library, UTF must be | ||||
| set. This option cannot be combined with PCRE2_EXTRA_CASELESS_RESTRI | ||||
| CT. | ||||
| JUST-IN-TIME (JIT) COMPILATION | JUST-IN-TIME (JIT) COMPILATION | |||
| int pcre2_jit_compile(pcre2_code *code, uint32_t options); | int pcre2_jit_compile(pcre2_code *code, uint32_t options); | |||
| int pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, | int pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, | |||
| PCRE2_SIZE length, PCRE2_SIZE startoffset, | PCRE2_SIZE length, PCRE2_SIZE startoffset, | |||
| uint32_t options, pcre2_match_data *match_data, | uint32_t options, pcre2_match_data *match_data, | |||
| pcre2_match_context *mcontext); | pcre2_match_context *mcontext); | |||
| void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext); | void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext); | |||
| skipping to change at line 1752 | skipping to change at line 1883 | |||
| When .* is the first significant item, anchoring is possible only wh en all the following are | When .* is the first significant item, anchoring is possible only wh en all the following are | |||
| true: | true: | |||
| .* is not in an atomic group | .* is not in an atomic group | |||
| .* is not in a capture group that is the subject | .* is not in a capture group that is the subject | |||
| of a backreference | of a backreference | |||
| PCRE2_DOTALL is in force for .* | PCRE2_DOTALL is in force for .* | |||
| Neither (*PRUNE) nor (*SKIP) appears in the pattern | Neither (*PRUNE) nor (*SKIP) appears in the pattern | |||
| PCRE2_NO_DOTSTAR_ANCHOR is not set | PCRE2_NO_DOTSTAR_ANCHOR is not set | |||
| Dotstar anchoring has not been disabled with PCRE2_DOTSTAR_ANCHOR_ OFF | ||||
| For patterns that are auto-anchored, the PCRE2_ANCHORED bit is set in the options returned for | For patterns that are auto-anchored, the PCRE2_ANCHORED bit is set in the options returned for | |||
| PCRE2_INFO_ALLOPTIONS. | PCRE2_INFO_ALLOPTIONS. | |||
| PCRE2_INFO_BACKREFMAX | PCRE2_INFO_BACKREFMAX | |||
| Return the number of the highest backreference in the pattern. The t hird argument should point | Return the number of the highest backreference in the pattern. The t hird argument should point | |||
| to a uint32_t variable. Named capture groups acquire numbers as we ll as names, and these count | to a uint32_t variable. Named capture groups acquire numbers as we ll as names, and these count | |||
| towards the highest backreference. Backreferences such as \4 or \g{1 2} match the captured char‐ | towards the highest backreference. Backreferences such as \4 or \g{1 2} match the captured char‐ | |||
| acters of the given group, but in addition, the check that a capt ure group is set in a condi‐ | acters of the given group, but in addition, the check that a capt ure group is set in a condi‐ | |||
| skipping to change at line 2805 | skipping to change at line 2937 | |||
| If the function is not successful, the value set via outlengthptr d epends on the type of error. | If the function is not successful, the value set via outlengthptr d epends on the type of error. | |||
| For syntax errors in the replacement string, the value is the offset in the replacement string | For syntax errors in the replacement string, the value is the offset in the replacement string | |||
| where the error was detected. For other errors, the value is PCRE 2_UNSET by default. This in‐ | where the error was detected. For other errors, the value is PCRE 2_UNSET by default. This in‐ | |||
| cludes the case of the output buffer being too small, unless PCRE2_S UBSTITUTE_OVERFLOW_LENGTH is | cludes the case of the output buffer being too small, unless PCRE2_S UBSTITUTE_OVERFLOW_LENGTH is | |||
| set. | set. | |||
| PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the outpu t buffer is too small. The | PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the outpu t buffer is too small. The | |||
| default action is to return PCRE2_ERROR_NOMEMORY immediately. If this option is set, however, | default action is to return PCRE2_ERROR_NOMEMORY immediately. If this option is set, however, | |||
| pcre2_substitute() continues to go through the motions of matching a nd substituting (without, of | pcre2_substitute() continues to go through the motions of matching a nd substituting (without, of | |||
| course, writing anything) in order to compute the size of buffer tha | course, writing anything) in order to compute the size of buffer tha | |||
| t is needed. This value is | t is needed, which will in‐ | |||
| passed back via the outlengthptr variable, with the result of the fu | clude the extra space for the terminating NUL. This value is pass | |||
| nction still being PCRE2_ER‐ | ed back via the outlengthptr | |||
| ROR_NOMEMORY. | variable, with the result of the function still being PCRE2_ERROR_NO | |||
| MEMORY. | ||||
| Passing a buffer size of zero is a permitted way of finding out h | Passing a buffer size of zero is a permitted way of finding out how | |||
| ow much memory is needed for | much memory is needed for | |||
| given substitution. However, this does mean that the entire operatio | given substitution. However, this does mean that the entire operati | |||
| n is carried out twice. De‐ | on is carried out twice. De‐ | |||
| pending on the application, it may be more efficient to allocate a l arge buffer and free the ex‐ | pending on the application, it may be more efficient to allocate a l arge buffer and free the ex‐ | |||
| cess afterwards, instead of using PCRE2_SUBSTITUTE_OVERFLOW_LENGTH. | cess afterwards, instead of using PCRE2_SUBSTITUTE_OVERFLOW_LENGTH. | |||
| The replacement string, which is interpreted as a UTF string in UTF mode, is checked for UTF va‐ | The replacement string, which is interpreted as a UTF string in UTF mode, is checked for UTF va‐ | |||
| lidity unless PCRE2_NO_UTF_CHECK is set. An invalid UTF replacemen t string causes an immediate | lidity unless PCRE2_NO_UTF_CHECK is set. An invalid UTF replacement string causes an immediate | |||
| return with the relevant UTF error code. | return with the relevant UTF error code. | |||
| If PCRE2_SUBSTITUTE_LITERAL is set, the replacement string is not in terpreted in any way. By de‐ | If PCRE2_SUBSTITUTE_LITERAL is set, the replacement string is not in terpreted in any way. By de‐ | |||
| fault, however, a dollar character is an escape character that can | fault, however, a dollar character is an escape character that | |||
| specify the insertion of | can specify the insertion of | |||
| characters from capture groups and names from (*MARK) or other c | characters from capture groups and names from (*MARK) or other contr | |||
| ontrol verbs in the pattern. | ol verbs in the pattern. | |||
| Dollar is the only escape character (backslash is treated as literal | Dollar is the only escape character (backslash is treated as liter | |||
| ). The following forms are | al). The following forms are | |||
| always recognized: | recognized: | |||
| $$ insert a dollar character | $$ insert a dollar character | |||
| $<n> or ${<n>} insert the contents of group <n> | $n or ${n} insert the contents of group n | |||
| $0 or $& insert the entire matched substring | ||||
| $` insert the substring that precedes the match | ||||
| $' insert the substring that follows the match | ||||
| $_ insert the entire input string | ||||
| $*MARK or ${*MARK} insert a control verb name | $*MARK or ${*MARK} insert a control verb name | |||
| Either a group number or a group name can be given for <n>. Curly b | Either a group number or a group name can be given for n, for exampl | |||
| rackets are required only if | e $2 or $NAME. Curly brack‐ | |||
| the following character would be interpreted as part of the number o | ets are required only if the following character would be interpre | |||
| r name. The number may be | ted as part of the number or | |||
| zero to include the entire matched string. For example, if the p | name. The number may be zero to include the entire matched string. F | |||
| attern a(b)c is matched with | or example, if the pattern | |||
| "=abc=" and the replacement string "+$1$0$1+", the result is "=+babc | a(b)c is matched with "=abc=" and the replacement string "+$1$0$1+", | |||
| b+=". | the result is "=+babcb+=". | |||
| The JavaScript form $<name>, where the angle brackets are part of th | ||||
| e syntax, is also recognized | ||||
| for group names, but not for group numbers or *MARK. | ||||
| $*MARK inserts the name from the last encountered backtracking contr ol verb on the matching path | $*MARK inserts the name from the last encountered backtracking contr ol verb on the matching path | |||
| that has a name. (*MARK) must always include a name, but the other v erbs need not. For example, | that has a name. (*MARK) must always include a name, but the other verbs need not. For example, | |||
| in the case of (*MARK:A)(*PRUNE) the name inserted is "A", but for ( *MARK:A)(*PRUNE:B) the rele‐ | in the case of (*MARK:A)(*PRUNE) the name inserted is "A", but for ( *MARK:A)(*PRUNE:B) the rele‐ | |||
| vant name is "B". This facility can be used to perform simple si multaneous substitutions, as | vant name is "B". This facility can be used to perform simple simu ltaneous substitutions, as | |||
| this pcre2test example shows: | this pcre2test example shows: | |||
| /(*MARK:pear)apple|(*MARK:orange)lemon/g,replace=${*MARK} | /(*MARK:pear)apple|(*MARK:orange)lemon/g,replace=${*MARK} | |||
| apple lemon | apple lemon | |||
| 2: pear orange | 2: pear orange | |||
| PCRE2_SUBSTITUTE_GLOBAL causes the function to iterate over the subj | PCRE2_SUBSTITUTE_GLOBAL causes the function to iterate over the sub | |||
| ect string, replacing every | ject string, replacing every | |||
| matching substring. If this option is not set, only the first mat | matching substring. If this option is not set, only the first matchi | |||
| ching substring is replaced. | ng substring is replaced. | |||
| The search for matches takes place in the original subject string (t | The search for matches takes place in the original subject string | |||
| hat is, previous replace‐ | (that is, previous replace‐ | |||
| ments do not affect it). Iteration is implemented by advancing th | ments do not affect it). Iteration is implemented by advancing the | |||
| e startoffset value for each | startoffset value for each | |||
| search, which is always passed the entire subject string. If an offs et limit is set in the match | search, which is always passed the entire subject string. If an offs et limit is set in the match | |||
| context, searching stops when that limit is reached. | context, searching stops when that limit is reached. | |||
| You can restrict the effect of a global substitution to a portion of the subject string by set‐ | You can restrict the effect of a global substitution to a portion o f the subject string by set‐ | |||
| ting either or both of startoffset and an offset limit. Here is a pc re2test example: | ting either or both of startoffset and an offset limit. Here is a pc re2test example: | |||
| /B/g,replace=!,use_offset_limit | /B/g,replace=!,use_offset_limit | |||
| ABC ABC ABC ABC\=offset=3,offset_limit=12 | ABC ABC ABC ABC\=offset=3,offset_limit=12 | |||
| 2: ABC A!C A!C ABC | 2: ABC A!C A!C ABC | |||
| When continuing with global substitutions after matching a substr ing with zero length, an at‐ | When continuing with global substitutions after matching a substring with zero length, an at‐ | |||
| tempt to find a non-empty match at the same offset is performed. If this is not successful, the | tempt to find a non-empty match at the same offset is performed. If this is not successful, the | |||
| offset is advanced by one character except when CRLF is a valid newl ine sequence and the next | offset is advanced by one character except when CRLF is a valid n ewline sequence and the next | |||
| two characters are CR, LF. In this case, the offset is advanced by t wo characters. | two characters are CR, LF. In this case, the offset is advanced by t wo characters. | |||
| PCRE2_SUBSTITUTE_UNKNOWN_UNSET causes references to capture grou | PCRE2_SUBSTITUTE_UNKNOWN_UNSET causes references to capture groups t | |||
| ps that do not appear in the | hat do not appear in the | |||
| pattern to be treated as unset groups. This option should be used wi | pattern to be treated as unset groups. This option should be used | |||
| th care, because it means | with care, because it means | |||
| that a typo in a group name or number no longer causes the PCRE2_ERR OR_NOSUBSTRING error. | that a typo in a group name or number no longer causes the PCRE2_ERR OR_NOSUBSTRING error. | |||
| PCRE2_SUBSTITUTE_UNSET_EMPTY causes unset capture groups (incl uding unknown groups when | PCRE2_SUBSTITUTE_UNSET_EMPTY causes unset capture groups (inclu ding unknown groups when | |||
| PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) to be treated as empty string s when inserted as described | PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) to be treated as empty string s when inserted as described | |||
| above. If this option is not set, an attempt to insert an unset grou p causes the PCRE2_ERROR_UN‐ | above. If this option is not set, an attempt to insert an unset grou p causes the PCRE2_ERROR_UN‐ | |||
| SET error. This option does not influence the extended substitution syntax described below. | SET error. This option does not influence the extended substitution syntax described below. | |||
| PCRE2_SUBSTITUTE_EXTENDED causes extra processing to be applied to t he replacement string. With‐ | PCRE2_SUBSTITUTE_EXTENDED causes extra processing to be applied to t he replacement string. With‐ | |||
| out this option, only the dollar character is special, and only the group insertion forms listed | out this option, only the dollar character is special, and only the group insertion forms listed | |||
| above are valid. When PCRE2_SUBSTITUTE_EXTENDED is set, two things c hange: | above are valid. When PCRE2_SUBSTITUTE_EXTENDED is set, several thin gs change: | |||
| Firstly, backslash in a replacement string is interpreted as an es | Firstly, backslash in a replacement string is interpreted as an | |||
| cape character. The usual | escape character. The usual | |||
| forms such as \n or \x{ddd} can be used to specify particular ch | forms such as \x{ddd} can be used to specify particular character co | |||
| aracter codes, and backslash | des, and backslash followed | |||
| followed by any non-alphanumeric character quotes that character. Ex | by any non-alphanumeric character quotes that character. Extende | |||
| tended quoting can be coded | d quoting can be coded using | |||
| using \Q...\E, exactly as in pattern strings. | \Q...\E, exactly as in pattern strings. The escapes \b and \v are in | |||
| terpreted as the characters | ||||
| There are also four escape sequences for forcing the case of inse | backspace and vertical tab, respectively. | |||
| rted letters. The insertion | ||||
| mechanism has three states: no case forcing, force upper case, and f | The interpretation of backslash followed by one or more digits | |||
| orce lower case. The escape | is the same as in a pattern, | |||
| sequences change the current state: \U and \L change to upper or | which in Perl has some ambiguities. Details are given in the pcre2pa | |||
| lower case forcing, respec‐ | ttern page. | |||
| tively, and \E (when not terminating a \Q quoted sequence) reverts t | ||||
| o no case forcing. The se‐ | The Python form \g<n>, where the angle brackets are part of the synt | |||
| quences \u and \l force the next character (if it is a letter) to | ax and n is either a group | |||
| upper or lower case, respec‐ | name or number, is recognized as an altertive way of inserting the | |||
| tively, and then the state automatically reverts to no case forcing. | contents of a group, for ex‐ | |||
| Case forcing applies to all | ample \g<3>. | |||
| inserted characters, including those from capture groups and letters | ||||
| within \Q...\E quoted se‐ | There are also four escape sequences for forcing the case of inserte | |||
| quences. If either PCRE2_UTF or PCRE2_UCP was set when the pattern w | d letters. Case forcing ap‐ | |||
| as compiled, Unicode proper‐ | plies to all inserted characters, including those from capture group | |||
| ties are used for case forcing characters whose code points are grea | s and letters within \Q...\E | |||
| ter than 127. | quoted sequences. The insertion mechanism has three states: no case | |||
| forcing, force upper case, | ||||
| and force lower case. The escape sequences change the current stat | ||||
| e: \U and \L change to upper | ||||
| or lower case forcing, respectively, and \E (when not terminating a | ||||
| \Q quoted sequence) reverts | ||||
| to no case forcing. The sequences \u and \l force the next characte | ||||
| r (if it is a letter) to up‐ | ||||
| per or lower case, respectively, and then the state automatically re | ||||
| verts to no case forcing. | ||||
| However, if \u is immediately followed by \L or \l is immediately fo | ||||
| llowed by \U, the next char‐ | ||||
| acter's case is forced by the first escape sequence, and subsequent | ||||
| characters by the second. | ||||
| This provides a "title casing" facility that can be applied to gro | ||||
| up captures. For example, if | ||||
| group 1 has captured "heLLo", the replacement string "\u\L$1" become | ||||
| s "Hello". | ||||
| If either PCRE2_UTF or PCRE2_UCP was set when the pattern was compil | ||||
| ed, Unicode properties are | ||||
| used for case forcing characters whose code points are greater th | ||||
| an 127. However, only simple | ||||
| case folding, as determined by the Unicode file CaseFolding.txt is | ||||
| supported. PCRE2 does not | ||||
| support language-specific special casing rules such as using diffe | ||||
| rent lower case Greek sigmas | ||||
| in the middle and ends of words (as defined in the Unicode file Spec | ||||
| ialCasing.txt). | ||||
| Note that case forcing sequences such as \U...\E do not nest. For | Note that case forcing sequences such as \U...\E do not nest. For ex | |||
| example, the result of pro‐ | ample, the result of pro‐ | |||
| cessing "\Uaa\LBB\Ecc\E" is "AAbbcc"; the final \E has no ef | cessing "\Uaa\LBB\Ecc\E" is "AAbbcc"; the final \E has no e | |||
| fect. Note also that the | ffect. Note also that the | |||
| PCRE2_ALT_BSUX and PCRE2_EXTRA_ALT_BSUX options do not apply to repl acement strings. | PCRE2_ALT_BSUX and PCRE2_EXTRA_ALT_BSUX options do not apply to repl acement strings. | |||
| The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more flexibility to capture | The final effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add mo re flexibility to capture | |||
| group substitution. The syntax is similar to that used by Bash: | group substitution. The syntax is similar to that used by Bash: | |||
| ${<n>:-<string>} | ${n:-string} | |||
| ${<n>:+<string1>:<string2>} | ${n:+string1:string2} | |||
| As before, <n> may be a group number or a name. The first form speci | As in the simple case, n may be a group number or a name. The fi | |||
| fies a default value. If | rst form specifies a default | |||
| group <n> is set, its value is inserted; if not, <string> is expan | value. If group n is set, its value is inserted; if not, the string | |||
| ded and the result inserted. | is expanded and the result | |||
| The second form specifies strings that are expanded and inserted whe | inserted. The second form specifies strings that are expanded and | |||
| n group <n> is set or unset, | inserted when group n is set | |||
| respectively. The first form is just a convenient shorthand for | or unset, respectively. The first form is just a convenient shorthan | |||
| d for | ||||
| ${<n>:+${<n>}:<string>} | ${n:+${n}:string} | |||
| Backslash can be used to escape colons and closing curly brackets in the replacement strings. A | Backslash can be used to escape colons and closing curly brackets in the replacement strings. A | |||
| change of the case forcing state within a replacement string rem ains in force afterwards, as | change of the case forcing state within a replacement string rem ains in force afterwards, as | |||
| shown in this pcre2test example: | shown in this pcre2test example: | |||
| /(some)?(body)/substitute_extended,replace=${1:+\U:\L}HeLLo | /(some)?(body)/substitute_extended,replace=${1:+\U:\L}HeLLo | |||
| body | body | |||
| 1: hello | 1: hello | |||
| somebody | somebody | |||
| 1: HELLO | 1: HELLO | |||
| skipping to change at line 2963 | skipping to change at line 3119 | |||
| Substitution callouts | Substitution callouts | |||
| int pcre2_set_substitute_callout(pcre2_match_context *mcontext, | int pcre2_set_substitute_callout(pcre2_match_context *mcontext, | |||
| int (*callout_function)(pcre2_substitute_callout_block *, void *), | int (*callout_function)(pcre2_substitute_callout_block *, void *), | |||
| void *callout_data); | void *callout_data); | |||
| The pcre2_set_substitution_callout() function can be used to specif y a callout function for | The pcre2_set_substitution_callout() function can be used to specif y a callout function for | |||
| pcre2_substitute(). This information is passed in a match cont ext. The callout function is | pcre2_substitute(). This information is passed in a match cont ext. The callout function is | |||
| called after each substitution has been processed, but it can cause the replacement not to hap‐ | called after each substitution has been processed, but it can cause the replacement not to hap‐ | |||
| pen. The callout function is not called for simulated substitution | pen. | |||
| s that happen as a result of | ||||
| the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option. | The callout function is not called for simulated substitutions th | |||
| at happen as a result of the | ||||
| PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option. In this mode, when substitu | ||||
| tion processing exceeds the | ||||
| buffer space provided by the caller, processing continues by countin | ||||
| g code units. The simulation | ||||
| is unable to populate the callout block, and so the simulation is pe | ||||
| ssimistic about the required | ||||
| buffer size. Whichever is larger of accepted or rejected substit | ||||
| ution is reported as the re‐ | ||||
| quired size. Therefore, the returned buffer length may be an overest | ||||
| imate (without a substitu‐ | ||||
| tion callout, it is normally an exact measurement). | ||||
| The first argument of the callout function is a pointer to a substit ute callout block structure, | The first argument of the callout function is a pointer to a substit ute callout block structure, | |||
| which contains the following fields, not necessarily in this order: | which contains the following fields, not necessarily in this order: | |||
| uint32_t version; | uint32_t version; | |||
| uint32_t subscount; | uint32_t subscount; | |||
| PCRE2_SPTR input; | PCRE2_SPTR input; | |||
| PCRE2_SPTR output; | PCRE2_SPTR output; | |||
| PCRE2_SIZE *ovector; | PCRE2_SIZE *ovector; | |||
| uint32_t oveccount; | uint32_t oveccount; | |||
| PCRE2_SIZE output_offsets[2]; | PCRE2_SIZE output_offsets[2]; | |||
| The version field contains the version number of the block format. T he current version is 0. The | The version field contains the version number of the block format. T he current version is 0. The | |||
| version number will increase in future if more fields are added, but the intention is never to | version number will increase in future if more fields are added, b ut the intention is never to | |||
| remove any of the existing fields. | remove any of the existing fields. | |||
| The subscount field is the number of the current match. It is 1 for the first callout, 2 for the | The subscount field is the number of the current match. It is 1 for the first callout, 2 for the | |||
| second, and so on. The input and output pointers are copies of the values passed to pcre2_sub‐ | second, and so on. The input and output pointers are copies of the v alues passed to pcre2_sub‐ | |||
| stitute(). | stitute(). | |||
| The ovector field points to the ovector, which contains the result o f the most recent match. The | The ovector field points to the ovector, which contains the result o f the most recent match. The | |||
| oveccount field contains the number of pairs that are set in the ove ctor, and is always greater | oveccount field contains the number of pairs that are set in the ov ector, and is always greater | |||
| than zero. | than zero. | |||
| The output_offsets vector contains the offsets of the replacement in the output string. This has | The output_offsets vector contains the offsets of the replacement in the output string. This has | |||
| already been processed for dollar and (if requested) backslash subst itutions as described above. | already been processed for dollar and (if requested) backslash subst itutions as described above. | |||
| The second argument of the callout function is the value passed as callout_data when the func‐ | The second argument of the callout function is the value passed as c allout_data when the func‐ | |||
| tion was registered. The value returned by the callout function is i nterpreted as follows: | tion was registered. The value returned by the callout function is i nterpreted as follows: | |||
| If the value is zero, the replacement is accepted, and, if PCRE2_SUB | If the value is zero, the replacement is accepted, and, if PCRE2_SU | |||
| STITUTE_GLOBAL is set, pro‐ | BSTITUTE_GLOBAL is set, pro‐ | |||
| cessing continues with a search for the next match. If the value | cessing continues with a search for the next match. If the value is | |||
| is not zero, the current re‐ | not zero, the current re‐ | |||
| placement is not accepted. If the value is greater than zero, | placement is not accepted. If the value is greater than zer | |||
| processing continues when | o, processing continues when | |||
| PCRE2_SUBSTITUTE_GLOBAL is set. Otherwise (the value is less | PCRE2_SUBSTITUTE_GLOBAL is set. Otherwise (the value is less t | |||
| than zero or PCRE2_SUBSTI‐ | han zero or PCRE2_SUBSTI‐ | |||
| TUTE_GLOBAL is not set), the rest of the input is copied to the | TUTE_GLOBAL is not set), the rest of the input is copied t | |||
| output and the call to | o the output and the call to | |||
| pcre2_substitute() exits, returning the number of matches so far. | pcre2_substitute() exits, returning the number of matches so far. | |||
| Substitution case callouts | ||||
| int pcre2_set_substitute_case_callout(pcre2_match_context *mcontext, | ||||
| PCRE2_SIZE (*callout_function)(PCRE2_SPTR, PCRE2_SIZE, | ||||
| PCRE2_UCHAR *, PCRE2_SIZE, | ||||
| int, void *), | ||||
| void *callout_data); | ||||
| The pcre2_set_substitution_case_callout() function can be used to sp | ||||
| ecify a callout function for | ||||
| pcre2_substitute() to use when performing case transformations. This | ||||
| does not affect any case | ||||
| insensitivity behaviour when performing a match, but only the user- | ||||
| visible transformations per‐ | ||||
| formed when processing a substitution such as: | ||||
| pcre2_substitute(..., "\\U$1", ...) | ||||
| The default case transformations applied by PCRE2 are reasonably com | ||||
| plete, and, in UTF or UCP | ||||
| mode, perform the simple locale-invariant case transformations as s | ||||
| pecified by Unicode. This is | ||||
| suitable for the internal (invisible) case-equivalence procedures us | ||||
| ed during pattern matching, | ||||
| but an application may wish to use more sophisticated locale-aware p | ||||
| rocessing for the user-visi‐ | ||||
| ble substitution transformations. | ||||
| One example implementation of the callout_function using the ICU lib | ||||
| rary would be: | ||||
| PCRE2_SIZE | ||||
| icu_case_callout( | ||||
| PCRE2_SPTR input, PCRE2_SIZE input_len, | ||||
| PCRE2_UCHAR *output, PCRE2_SIZE output_cap, | ||||
| int to_case, void *data_ptr) | ||||
| { | ||||
| UErrorCode err = U_ZERO_ERROR; | ||||
| int32_t r = to_case == PCRE2_SUBSTITUTE_CASE_LOWER | ||||
| ? u_strToLower(output, output_cap, input, input_len, NULL, & | ||||
| err) | ||||
| : to_case == PCRE2_SUBSTITUTE_CASE_UPPER | ||||
| ? u_strToUpper(output, output_cap, input, input_len, NULL, & | ||||
| err) | ||||
| : u_strToTitle(output, output_cap, input, input_len, &first_ | ||||
| char_only, | ||||
| NULL, &err); | ||||
| if (U_FAILURE(err)) return (~(PCRE2_SIZE)0); | ||||
| return r; | ||||
| } | ||||
| The first and second arguments of the case callout function are the | ||||
| Unicode string to transform. | ||||
| The third and fourth arguments are the output buffer and its capacit | ||||
| y. | ||||
| The fifth is one of the constants PCRE2_SUBSTITUTE_CASE_LOWER, PCR | ||||
| E2_SUBSTITUTE_CASE_UPPER, or | ||||
| PCRE2_SUBSTITUTE_CASE_TITLE_FIRST. PCRE2_SUBSTITUTE_CASE_LOWER and | ||||
| PCRE2_SUBSTITUTE_CASE_UPPER | ||||
| are passed to the callout to indicate that the case of the entire c | ||||
| allout input should be case- | ||||
| transformed. PCRE2_SUBSTITUTE_CASE_TITLE_FIRST is passed to indicate | ||||
| that only the first charac‐ | ||||
| ter or glyph should be transformed to Unicode titlecase and the rest | ||||
| to Unicode lowercase (note | ||||
| that titlecasing sometimes uses Unicode properties to titlecase each | ||||
| word in a string; but PCRE2 | ||||
| is requesting that only the single leading character is to be titlec | ||||
| ased). | ||||
| The sixth argument is the callout_data supplied to pcre2_set_substit | ||||
| ute_case_callout(). | ||||
| The resulting string in the destination buffer may be larger or sm | ||||
| aller than the input, if the | ||||
| casing rules merge or split characters. The return value is the leng | ||||
| th required for the output | ||||
| string. If a buffer of sufficient size was provided to the call | ||||
| out, then the result must be | ||||
| written to the buffer and the number of code units returned. If the | ||||
| result does not fit in the | ||||
| provided buffer, then the required capacity must be returned and PC | ||||
| RE2 will not make use of the | ||||
| output buffer. PCRE2 provides input and output buffers which overlap | ||||
| , so the callout must sup‐ | ||||
| port this by suitable internal buffering. | ||||
| Alternatively, if the callout wishes to indicate an error, then it | ||||
| may return (~(PCRE2_SIZE)0). | ||||
| In this case pcre2_substitute() will immediately fail with error PCR | ||||
| E2_ERROR_REPLACECASE. | ||||
| When a case callout is combined with the PCRE2_SUBSTITUTE_OVERFLOW_L | ||||
| ENGTH option, there are sit‐ | ||||
| uations when pcre2_substitute() will return an underestimate of the | ||||
| required buffer size. If you | ||||
| call pcre2_substitute() once with PCRE2_SUBSTITUTE_OVERFLOW_LENGTH, | ||||
| and the input buffer is too | ||||
| small for the replacement string to be constructed, then instead | ||||
| of calling the case callout, | ||||
| pcre2_substitute() will make an estimate of the required buffer size | ||||
| . The second call should | ||||
| also pass PCRE2_SUBSTITUTE_OVERFLOW_LENGTH, because that second ca | ||||
| ll is not guaranteed to suc‐ | ||||
| ceed either, if the case callout requires more buffer space than exp | ||||
| ected. The caller must make | ||||
| repeated attempts in a loop. | ||||
| DUPLICATE CAPTURE GROUP NAMES | DUPLICATE CAPTURE GROUP NAMES | |||
| int pcre2_substring_nametable_scan(const pcre2_code *code, | int pcre2_substring_nametable_scan(const pcre2_code *code, | |||
| PCRE2_SPTR name, PCRE2_SPTR *first, PCRE2_SPTR *last); | PCRE2_SPTR name, PCRE2_SPTR *first, PCRE2_SPTR *last); | |||
| When a pattern is compiled with the PCRE2_DUPNAMES option, names fo r capture groups are not re‐ | When a pattern is compiled with the PCRE2_DUPNAMES option, names fo r capture groups are not re‐ | |||
| quired to be unique. Duplicate names are always allowed for groups w ith the same number, created | quired to be unique. Duplicate names are always allowed for groups w ith the same number, created | |||
| by using the (?| feature. Indeed, if such groups are named, they are required to use the same | by using the (?| feature. Indeed, if such groups are named, they are required to use the same | |||
| names. | names. | |||
| skipping to change at line 3213 | skipping to change at line 3450 | |||
| pcre2sample(3), pcre2unicode(3). | pcre2sample(3), pcre2unicode(3). | |||
| AUTHOR | AUTHOR | |||
| Philip Hazel | Philip Hazel | |||
| Retired from University Computing Service | Retired from University Computing Service | |||
| Cambridge, England. | Cambridge, England. | |||
| REVISION | REVISION | |||
| Last updated: 24 April 2024 | Last updated: 26 December 2024 | |||
| Copyright (c) 1997-2024 University of Cambridge. | Copyright (c) 1997-2024 University of Cambridge. | |||
| PCRE2 10.44 24 April 2024 PCRE2API(3) | PCRE2 10.45-RC1 26 December 2024 PCRE2API(3) | |||
| End of changes. 135 change blocks. | ||||
| 526 lines changed or deleted | 874 lines changed or added | |||
This html diff was produced by rfcdiff 1.41. The latest version is available from http://tools.ietf.org/tools/rfcdiff/ | ||||