Overview
Comment: | When outputing HTML-escaped text and an ampersand is encountered, only transform it to an HTML entity if it looks like it's not one already. HTML entities are left as is, noting that we do only a syntactic check and not a test for whether the entity is a standard/known one. This replaces [aca3d2dc1a00], reverting the doc changes made by that checkin. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
43eb7d998415bf725c6a87f7ad7d25d5 |
User & Date: | stephan 2022-06-14 14:32:16 |
References
2022-06-14
| ||
17:41 | Expanded [43eb7d998415] to permit digits in HTML entities after the first character, to allow for sup2, frac12, and similar entities. Reported in [dcf1418ef16f]. (check-in: a28bee7108 user: stephan tags: trunk) | |
14:37 | • Reply: Invalid SVG for lone Ampersand (artifact: 0477e25d89 user: stephan) | |
Context
2022-06-14
| ||
15:25 | Minor doc correction and code style tweak to the previous checkin. No functional changes. (check-in: b5a5779c8d user: stephan tags: trunk) | |
14:32 | When outputing HTML-escaped text and an ampersand is encountered, only transform it to an HTML entity if it looks like it's not one already. HTML entities are left as is, noting that we do only a syntactic check and not a test for whether the entity is a standard/known one. This replaces [aca3d2dc1a00], reverting the doc changes made by that checkin. (check-in: 43eb7d9984 user: stephan tags: trunk) | |
2022-06-06
| ||
23:51 | Removed some duplicated docs, one copy of which was out of date. (check-in: 1ecd389f95 user: stephan tags: trunk) | |
Changes
Changes to pikchr.c.
︙ | ︙ | |||
4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 | p->zOut = z; p->nOutAlloc = nNew; } memcpy(p->zOut+p->nOut, zText, n); p->nOut += n; p->zOut[p->nOut] = 0; } /* ** Append text to zOut with HTML characters escaped. ** ** * The space character is changed into non-breaking space (U+00a0) ** if mFlags has the 0x01 bit set. This is needed when outputting ** text to preserve leading and trailing whitespace. Turns out we | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 | p->zOut = z; p->nOutAlloc = nNew; } memcpy(p->zOut+p->nOut, zText, n); p->nOut += n; p->zOut[p->nOut] = 0; } /* ** Given a string and its length, returns true if the string begins ** with a construct which syntactically matches an HTML entity escape ** sequence (without checking for whether it's a known entity). Always ** returns false if zText[0] is false or n<4. Entities match the ** equivalent of the regexes `&#[0-9]+;` and `&[a-zA-Z]+;`. */ static int pik_isentity(char const * zText, int n){ int i = 0; if( n<4 || '&'!=zText[0] ) return 0; n--; zText++; if( '#'==zText[0] ){ zText++; n--; for(i=0; i<n; i++){ if( i>1 && ';'==zText[i] ) return 1; else if( zText[i]<'0' || zText[i]>'9' ) return 0; } }else{ for( i=0; i<n; i++ ){ if( i>1 && ';'==zText[i] ) return 1; else if( zText[i]<'A' || zText[i]>'z' || (zText[i]>'Z' && zText[i]<'a') ) return 0; } } return 0; } /* ** Append text to zOut with HTML characters escaped. ** ** * The space character is changed into non-breaking space (U+00a0) ** if mFlags has the 0x01 bit set. This is needed when outputting ** text to preserve leading and trailing whitespace. Turns out we |
︙ | ︙ | |||
4562 4563 4564 4565 4566 4567 4568 | if( c=='&' && bQAmp ) break; } if( i ) pik_append(p, zText, i); if( i==n ) break; switch( c ){ case '<': { pik_append(p, "<", 4); break; } case '>': { pik_append(p, ">", 4); break; } | < > > > | 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 | if( c=='&' && bQAmp ) break; } if( i ) pik_append(p, zText, i); if( i==n ) break; switch( c ){ case '<': { pik_append(p, "<", 4); break; } case '>': { pik_append(p, ">", 4); break; } case ' ': { pik_append(p, "\302\240;", 2); break; } case '&': if( pik_isentity(zText+i, n-i) ){ pik_append(p, "&", 1); } else { pik_append(p, "&", 5); } } i++; n -= i; zText += i; i = 0; } } |
︙ | ︙ | |||
8094 8095 8096 8097 8098 8099 8100 | return TCL_OK; } #endif /* PIKCHR_TCL */ | | | 8125 8126 8127 8128 8129 8130 8131 8132 | return TCL_OK; } #endif /* PIKCHR_TCL */ #line 8157 "pikchr.c" |
Changes to pikchr.y.
︙ | ︙ | |||
1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 | p->zOut = z; p->nOutAlloc = nNew; } memcpy(p->zOut+p->nOut, zText, n); p->nOut += n; p->zOut[p->nOut] = 0; } /* ** Append text to zOut with HTML characters escaped. ** ** * The space character is changed into non-breaking space (U+00a0) ** if mFlags has the 0x01 bit set. This is needed when outputting ** text to preserve leading and trailing whitespace. Turns out we | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 | p->zOut = z; p->nOutAlloc = nNew; } memcpy(p->zOut+p->nOut, zText, n); p->nOut += n; p->zOut[p->nOut] = 0; } /* ** Given a string and its length, returns true if the string begins ** with a construct which syntactically matches an HTML entity escape ** sequence (without checking for whether it's a known entity). Always ** returns false if zText[0] is false or n<4. Entities match the ** equivalent of the regexes `&#[0-9]+;` and `&[a-zA-Z]+;`. */ static int pik_isentity(char const * zText, int n){ int i = 0; if( n<4 || '&'!=zText[0] ) return 0; n--; zText++; if( '#'==zText[0] ){ zText++; n--; for(i=0; i<n; i++){ if( i>1 && ';'==zText[i] ) return 1; else if( zText[i]<'0' || zText[i]>'9' ) return 0; } }else{ for( i=0; i<n; i++ ){ if( i>1 && ';'==zText[i] ) return 1; else if( zText[i]<'A' || zText[i]>'z' || (zText[i]>'Z' && zText[i]<'a') ) return 0; } } return 0; } /* ** Append text to zOut with HTML characters escaped. ** ** * The space character is changed into non-breaking space (U+00a0) ** if mFlags has the 0x01 bit set. This is needed when outputting ** text to preserve leading and trailing whitespace. Turns out we |
︙ | ︙ | |||
1954 1955 1956 1957 1958 1959 1960 | if( c=='&' && bQAmp ) break; } if( i ) pik_append(p, zText, i); if( i==n ) break; switch( c ){ case '<': { pik_append(p, "<", 4); break; } case '>': { pik_append(p, ">", 4); break; } | < > > > | 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 | if( c=='&' && bQAmp ) break; } if( i ) pik_append(p, zText, i); if( i==n ) break; switch( c ){ case '<': { pik_append(p, "<", 4); break; } case '>': { pik_append(p, ">", 4); break; } case ' ': { pik_append(p, "\302\240;", 2); break; } case '&': if( pik_isentity(zText+i, n-i) ){ pik_append(p, "&", 1); } else { pik_append(p, "&", 5); } } i++; n -= i; zText += i; i = 0; } } |
︙ | ︙ |