From 25b7dc7802c5637118b3b34a109c8e918ee64451 Mon Sep 17 00:00:00 2001 From: wangliming Date: Wed, 4 Jul 2018 14:58:03 +0800 Subject: [PATCH] =?UTF-8?q?=E5=85=BC=E5=AE=B9PHP7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- _mbsupport.h | 7 +- badwords.c | 58 +++++---- badwords.h | 4 +- compiler.c | 15 ++- config.m4 | 20 +++- example/example1.php | 16 ++- example/example2.php | 91 ++++++++++++++ example/words.php | 4 + php57_include.h | 36 ++++++ php5_include.h | 56 +++++++++ php7_include.h | 95 +++++++++++++++ php_badwords.c | 278 +++++++++++++++++++++++++++++++++++-------- php_badwords.h | 21 +++- 13 files changed, 601 insertions(+), 100 deletions(-) create mode 100644 example/example2.php create mode 100644 example/words.php create mode 100644 php57_include.h create mode 100644 php5_include.h create mode 100644 php7_include.h diff --git a/_mbsupport.h b/_mbsupport.h index 6f0230c..0c24cfd 100644 --- a/_mbsupport.h +++ b/_mbsupport.h @@ -15,6 +15,12 @@ #ifndef ___MBSUPPORT_H__ #define ___MBSUPPORT_H__ +#include +#include +#include +#include +#include // C99 + #define BW_ENC_UTF8 0 #define BW_ENC_GBK 1 @@ -85,4 +91,3 @@ bw_mb_strtolower(uint8_t *str, uint8_t *end, int encoding) } #endif /* ___MBSUPPORT_H__ */ - diff --git a/badwords.c b/badwords.c index 85eb17c..310e88c 100644 --- a/badwords.c +++ b/badwords.c @@ -16,17 +16,15 @@ #include "config.h" #endif -#include "php.h" #include #include #include #include -#include "ext/standard/php_string.h" -#include "ext/standard/php_var.h" -#include "ext/standard/php_smart_str.h" -#include "_mbsupport.h" +#include "php57_include.h" + #include "badwords.h" +#include "_mbsupport.h" static void bw_match_text(struct bw_trie_header_t *header, zval *return_value, @@ -34,9 +32,9 @@ bw_match_text(struct bw_trie_header_t *header, zval *return_value, { struct bw_node_t *root = (struct bw_node_t *)(header+1), *node, *gotnode; uint8_t *end = text_to_walk + c, *watch, *gotwatch; - uint8_t *_Rep_base = (uint8_t *)(root + header->node_count); + // uint8_t *_Rep_base = (uint8_t *)(root + header->node_count); - smart_str result = {0}; + COM57_SMART_STRING_T result = {0}; while (text_to_walk < end) { /* MATCH */ @@ -44,7 +42,8 @@ bw_match_text(struct bw_trie_header_t *header, zval *return_value, watch = text_to_walk; node = root; gotnode = NULL; - + gotwatch = NULL; + do { node = root + node->next[*watch]; if (node->is_fragment) { @@ -63,8 +62,8 @@ bw_match_text(struct bw_trie_header_t *header, zval *return_value, } while (watch < end && node->next[*watch] != 0); - if (gotnode) { - smart_str_appendl(&result, text, (gotwatch - text_to_walk + 1)); + if (gotnode && gotwatch) { + COM57_SMART_STRING_APPENDL(&result, text, (gotwatch - text_to_walk + 1)); break; } } @@ -74,19 +73,22 @@ bw_match_text(struct bw_trie_header_t *header, zval *return_value, text_to_walk = watch; } - smart_str_0(&result); + COM57_SMART_STRING_0(&result); if (result.len) { - RETURN_STRINGL(result.c, result.len, 0); + // 统一PHP 5/7 接口(PHP 5有折损) + // COM57_RETURN_STRINGL(result.c, result.len, 0); + COM57_RETVAL_STRINGL(result.c, result.len, 1); + COM57_SMART_STRING_FREE(&result); } else { RETURN_EMPTY_STRING(); } } void -bw_trie_match(zval *trie, zval *return_value, uint8_t *text, int c) +bw_trie_match(char *trie, zval *return_value, uint8_t *text, int c) { - struct bw_trie_header_t *header = (struct bw_trie_header_t *) Z_STRVAL_P(trie); + struct bw_trie_header_t *header = (struct bw_trie_header_t *)trie; if (header->magic_num != BW_TRIE_MAGIC || header->version != BW_TRIE_VERSION) RETURN_FALSE; @@ -95,7 +97,7 @@ bw_trie_match(zval *trie, zval *return_value, uint8_t *text, int c) bw_match_text(header, return_value, text, text, c); } else { - uint8_t *text_to_walk = estrndup(text, c); + uint8_t *text_to_walk = (uint8_t *)estrndup((char *)text, c); if (!text_to_walk) { RETURN_FALSE; } else { @@ -114,7 +116,7 @@ bw_replace_text(struct bw_trie_header_t *header, zval *return_value, uint8_t *end = text_to_walk + c, *watch, *gotwatch; uint8_t *_Rep_base = (uint8_t *)(root + header->node_count); - smart_str result = {0}; + COM57_SMART_STRING_T result = {0}; while (text_to_walk < end) { /* REPLACE */ @@ -122,7 +124,8 @@ bw_replace_text(struct bw_trie_header_t *header, zval *return_value, watch = text_to_walk; node = root; gotnode = NULL; - + gotwatch = NULL; + do { node = root + node->next[*watch]; if (node->is_fragment) { @@ -141,9 +144,9 @@ bw_replace_text(struct bw_trie_header_t *header, zval *return_value, } while (watch < end && node->next[*watch] != 0); - if (gotnode) { + if (gotnode && gotwatch) { struct bw_string_t *replace = (struct bw_string_t *)(_Rep_base + gotnode->replace); - smart_str_appendl(&result, replace->byte, replace->len); + COM57_SMART_STRING_APPENDL(&result, replace->byte, replace->len); text += gotwatch - text_to_walk + 1; text_to_walk = gotwatch + 1; continue; @@ -151,25 +154,28 @@ bw_replace_text(struct bw_trie_header_t *header, zval *return_value, } watch = bw_mb_skip_char(text_to_walk, end, header->trie_encoding); - smart_str_appendl(&result, text, watch-text_to_walk); + COM57_SMART_STRING_APPENDL(&result, text, watch-text_to_walk); text += watch - text_to_walk; text_to_walk = watch; } - smart_str_0(&result); + COM57_SMART_STRING_0(&result); if (result.len) { - RETURN_STRINGL(result.c, result.len, 0); + // 统一PHP 5/7 接口(PHP 5有折损) + // COM57_RETURN_STRINGL(result.c, result.len, 0); + COM57_RETVAL_STRINGL(result.c, result.len, 1); + COM57_SMART_STRING_FREE(&result); } else { RETURN_EMPTY_STRING(); } } void -bw_trie_replace(zval *trie, zval *return_value, uint8_t *text, int c) +bw_trie_replace(char *trie, zval *return_value, uint8_t *text, int c) { - struct bw_trie_header_t *header = (struct bw_trie_header_t *) Z_STRVAL_P(trie); - + struct bw_trie_header_t *header = (struct bw_trie_header_t *)trie; + if (header->magic_num != BW_TRIE_MAGIC || header->version != BW_TRIE_VERSION) RETURN_FALSE; @@ -177,7 +183,7 @@ bw_trie_replace(zval *trie, zval *return_value, uint8_t *text, int c) bw_replace_text(header, return_value, text, text, c); } else { - uint8_t *text_to_walk = estrndup(text, c); + uint8_t *text_to_walk = (uint8_t *)estrndup((char *)text, c); if (!text_to_walk) { RETURN_FALSE; } else { diff --git a/badwords.h b/badwords.h index ccc0491..5e2089a 100644 --- a/badwords.h +++ b/badwords.h @@ -58,7 +58,7 @@ struct bw_node_t { }; }; -void bw_trie_match(zval *trie, zval *return_value, uint8_t *text, int c); -void bw_trie_replace(zval *trie, zval *return_value, uint8_t *text, int c); +void bw_trie_match(char *trie, zval *return_value, uint8_t *text, int c); +void bw_trie_replace(char *trie, zval *return_value, uint8_t *text, int c); #endif /* __BADWORDS_H_ */ diff --git a/compiler.c b/compiler.c index 65df202..e6d6476 100644 --- a/compiler.c +++ b/compiler.c @@ -16,14 +16,13 @@ #include "config.h" #endif -#include "php.h" #include #include #include #include -#include "ext/standard/php_string.h" -#include "ext/standard/php_var.h" +#include "php57_include.h" + #include "_mbsupport.h" #include "compiler.h" @@ -184,6 +183,7 @@ bw_trie_compiler_compile(struct bw_trie_compiler_t *compiler, zval *return_value uint32_t rlen = compiler->replace_len; uint32_t tlen = hlen + nlen + rlen; + // zend malloc uint8_t *trie = emalloc(tlen); if (trie) { @@ -197,10 +197,13 @@ bw_trie_compiler_compile(struct bw_trie_compiler_t *compiler, zval *return_value memcpy(trie+hlen, compiler->nodes, nlen); memcpy(trie+hlen+nlen, compiler->replaces, rlen); - RETURN_STRINGL(trie, tlen, 0); + // 统一PHP 5/7 接口(PHP 5有折损) + // COM57_RETURN_STRINGL((char *)trie, tlen, 0); + COM57_RETVAL_STRINGL((char *)trie, tlen, 1); + efree(trie); + } else { + RETURN_FALSE; } - - RETURN_FALSE; } void bw_trie_compiler_free(struct bw_trie_compiler_t *compiler) diff --git a/config.m4 b/config.m4 index 58f31ec..9d5ac92 100644 --- a/config.m4 +++ b/config.m4 @@ -1,19 +1,29 @@ dnl -dnl $Id: config9.m4 2011-08-09 15:48:23Z Wang Wenlin $ +dnl $Id: config9.m4 2011-08-09 15:48:23Z Wang Wenlin/2018-04-11 18:19:23Z wlmwang $ dnl PHP_ARG_ENABLE(badwords, whether to enable badwords support, [ --enable-badwords Enable badwords support]) if test "$PHP_BADWORDS" != "no"; then + + AC_DEFINE(HAVE_BADWORDS,1,[Whether you want badwords support]) - AC_DEFINE(HAVE_BADWORDS,1,[Whether you want badwords support]) +dnl PHP-7.*.* +dnl PHP_NEW_EXTENSION(badwords, badwords.c compiler.c php_badwords.c, $ext_shared,, -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1) + PHP_NEW_EXTENSION(badwords, badwords.c compiler.c php_badwords.c, $ext_shared) dnl this is needed to build the extension with phpize and -Wall - if test "$PHP_DEBUG" = "yes"; then - CFLAGS="$CFLAGS -Wall" - fi +dnl if test "$PHP_DEBUG" = "yes"; then +dnl CFLAGS="$CFLAGS -Wall" +dnl fi + +if test -z "$PHP_DEBUG"; then + AC_ARG_ENABLE(debug, + [ --enable-debug compile with debugging system], + [ PHP_DEBUG=$enableval], [PHP_DEBUG=no]) +fi fi diff --git a/example/example1.php b/example/example1.php index fc10cec..4d5e5ec 100644 --- a/example/example1.php +++ b/example/example1.php @@ -24,16 +24,16 @@ function get_shared_badwords__() { return $badwords; } - $wordfile = D_P.'data/cache/words.php'; - $triebin = '/tmp/com.foo.bar-words.bin'; - $persistkey = 'badwords::com.foo.bar::words'; + $wordfile = D_P.'words.php'; + $triebin = D_P.'com.foo.bar-words.bin'; + $persistkey = 'badwords::com.foo.bar1::words'; $wmtime = filemtime($wordfile); $tmtime = filemtime($triebin); if ($tmtime === FALSE || $tmtime !== $wmtime && mt_rand(0, 99) < 5) { include($wordfile); - $compiler = badwords_compiler_create(BADWORDS_ENCODING_GBK, True); + $compiler = badwords_compiler_create(BADWORDS_ENCODING_UTF8, True); badwords_compiler_append($compiler, $replace); unset($replace); @@ -45,6 +45,7 @@ function get_shared_badwords__() { file_put_contents($triebin_tmp, $trie); touch($triebin_tmp, $wmtime); rename($triebin_tmp, $triebin); + chmod($triebin, 0755); unset($trie); } } @@ -87,5 +88,8 @@ function do_match($message) return badwords_match($badwords, $message); } -$message = do_replace($message); -$xxword = do_match($message); +$message = "近日特朗普将携希拉里一同访问中国,北京欢迎你!520~"; +$rlword = do_replace($message); +$mtword = do_match($message); + +var_dump($rlword, $mtword); diff --git a/example/example2.php b/example/example2.php new file mode 100644 index 0000000..8ef6c9f --- /dev/null +++ b/example/example2.php @@ -0,0 +1,91 @@ +$to) { + if (stripos($message, (string)$from) !== FALSE) { + return $from; + } + } + return ''; + } + + /* -- USE BADWORDS EXTENSION -- */ + $badwords = get_shared_badwords__(); + return badwords_match($badwords, $message); +} + +for ($i=0; $i < 1000000; $i++) { + $message = "近日特朗普将携希拉里一同访问中国,北京欢迎你!520~"; + $rlword = do_replace($message); + $mtword = do_match($message); + // var_dump($rlword, $mtword); +} +var_dump($rlword, $mtword); diff --git a/example/words.php b/example/words.php new file mode 100644 index 0000000..752f30a --- /dev/null +++ b/example/words.php @@ -0,0 +1,4 @@ +'*河蟹*','希拉里'=>'*河蟹*', 520 => '*5*2*0', 213 => 233); + +?> \ No newline at end of file diff --git a/php57_include.h b/php57_include.h new file mode 100644 index 0000000..ae2b11e --- /dev/null +++ b/php57_include.h @@ -0,0 +1,36 @@ +/** Copyright 2011 HoopCHINA, Co., Ltd. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +#ifndef _PHP57_INCLUDE_H_ +#define _PHP57_INCLUDE_H_ + +// see doc. https://wiki.php.net/phpng-upgrading + +#include +#include +#include + +#include +#include +#include + +#if PHP_MAJOR_VERSION < 7 +#include +#include "php5_include.h" +#else +#include +#include "php7_include.h" +#endif + +#endif // _PHP57_INCLUDE_H_ \ No newline at end of file diff --git a/php5_include.h b/php5_include.h new file mode 100644 index 0000000..70ae25a --- /dev/null +++ b/php5_include.h @@ -0,0 +1,56 @@ +/** Copyright 2011 HoopCHINA, Co., Ltd. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +#ifndef _PHP5_INCLUDE_H_ +#define _PHP5_INCLUDE_H_ + +#define COM57_Z_STRVAL_PP Z_STRVAL_PP +#define COM57_Z_STRLEN_PP Z_STRLEN_PP +#define COM57_Z_LVAL_PP Z_LVAL_PP +#define COM57_Z_TYPE_PP Z_TYPE_PP +#define COM57_Z_ISREF_PP Z_ISREF_PP + +// 字符串宏定义 +#define COM57_RETURN_STRINGL RETURN_STRINGL +#define COM57_RETURN_STRING RETURN_STRING +#define COM57_ZVAL_STRINGL ZVAL_STRINGL +#define COM57_ZVAL_STRING ZVAL_STRING +#define COM57_RETVAL_STRINGL RETVAL_STRINGL +#define COM57_RETVAL_STRING RETVAL_STRING + +// smart_str 字符串相关操作(仅定义项目使用) +#define COM57_SMART_STRING_T smart_str +#define COM57_SMART_STRING_APPENDL smart_str_appendl +#define COM57_SMART_STRING_0 smart_str_0 +#define COM57_SMART_STRING_FREE smart_str_free + +#define COM57_ZEND_RESURCE_T zend_rsrc_list_entry + +// resource register/fetch +#define COM57_ZEND_REGISTER_RESOURCE ZEND_REGISTER_RESOURCE + +#define COM57_ZEND_FETCH_RESOURCE ZEND_FETCH_RESOURCE + +// php7结构 zend_string +#define COM57_STRING_INIT +#define COM57_STRING_RELEASE + +// hash +#define COM57_ZEND_HASH_DEL(ht, key, len) \ + zend_hash_del(ht, key, len) + +#define COM57_ZEND_HASH_UPDATE(ht, key, len, zv, zlen, dest_ptr) \ + zend_hash_update(ht, key, len, (void *)zv, zlen, dest_ptr) + +#endif // _PHP5_INCLUDE_H_ diff --git a/php7_include.h b/php7_include.h new file mode 100644 index 0000000..c2fd80a --- /dev/null +++ b/php7_include.h @@ -0,0 +1,95 @@ +/** Copyright 2011 HoopCHINA, Co., Ltd. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +#ifndef _PHP7_INCLUDE_H_ +#define _PHP7_INCLUDE_H_ + +#include +#ifndef INT64_MAX +#define INT64_MAX INT64_C( 9223372036854775807) +#endif +#ifndef INT64_MIN +#define INT64_MIN (-INT64_C( 9223372036854775807)-1) +#endif + +// 可以使用 zval_get_long(zval), zval_get_double(zval), zval_get_string(zval)等 +// 函数获取 zval 的值,这样不会改变原始的 zval + +#define COM57_Z_STRVAL_PP(zv_pp) Z_STRVAL_P(*zv_pp) +#define COM57_Z_STRLEN_PP(zv_pp) Z_STRLEN_P(*zv_pp) +#define COM57_Z_LVAL_PP(zv_pp) Z_LVAL_P(*zv_pp) +#define COM57_Z_TYPE_PP(zv_pp) Z_TYPE_P(*zv_pp) +#define COM57_Z_ISREF_PP(zv_pp) Z_ISREF_P(*zv_pp) + +// 字符串宏定义 +#define COM57_RETURN_STRINGL(s, l, dup) RETURN_STRINGL(s, l) +#define COM57_RETURN_STRING(s, dup) RETURN_STRING(s) +#define COM57_ZVAL_STRINGL(z, s, l, dup) ZVAL_STRINGL(z, s, l) +#define COM57_ZVAL_STRING(z, s, dup) ZVAL_STRING(z, s) +#define COM57_RETVAL_STRINGL(s, l, dup) RETVAL_STRINGL(s, l) +#define COM57_RETVAL_STRING(s, dup) RETVAL_STRING(s) + +// smart_string 字符串相关操作(仅定义项目使用) +#define COM57_SMART_STRING_T smart_string +#define COM57_SMART_STRING_APPENDL(dest, src, len) smart_string_appendl(dest, src, len) +#define COM57_SMART_STRING_0(s) smart_string_0(s) +#define COM57_SMART_STRING_FREE(s) smart_string_free(s) + +// php7 zend_rsrc_list_entry should be replaced by zend_resource +#define COM57_ZEND_RESURCE_T zend_resource + +// resource register/fetch +#define COM57_ZEND_REGISTER_RESOURCE(return_value, rsrc_ptr, rsrc_type) \ + RETURN_RES(zend_register_resource(rsrc_ptr, rsrc_type)) + +#define COM57_ZEND_FETCH_RESOURCE(return_value, return_type, zv, i, rsrc_ptr, rsrc_type) \ + return_value = (return_type)zend_fetch_resource(Z_RES_P(*zv), rsrc_ptr, rsrc_type); + +// zval* 获取 zend_string* +// zend_string* = Z_STR_P(zval*) + +// zend_string* 获取 char* +// char* = ZSTR_VAL(zend_string*) + +// zend_string* 获取 len +// char* = ZSTR_LEN(zend_string*) + +// char* to zend_string* | release zend_string* +#define COM57_STRING_INIT(c_string) zend_string_init(c_string, strlen(c_string), 0) +#define COM57_STRING_RELEASE(z_string) zend_string_release(z_string) + +// hash +#define COM57_ZEND_HASH_DEL(ht, key, len) \ +({ \ + int ret = FAILURE; \ + zend_string *zstr = zend_string_init(key, strlen(key), 0); \ + if (!(zend_hash_del(ht, zstr))) { \ + ret = SUCCESS; \ + } \ + zend_string_release(zstr); \ + ret; \ +}) + +#define COM57_ZEND_HASH_UPDATE(ht, key, len, zv, zlen, dest_ptr) \ +({ \ + int ret = FAILURE; \ + zend_string *zstr = zend_string_init(key, strlen(key), 0); \ + if (!(zend_hash_update(ht, zstr, zv))) { \ + ret = SUCCESS; \ + } \ + zend_string_release(zstr); \ + ret; \ +}) + +#endif // _PHP7_INCLUDE_H_ diff --git a/php_badwords.c b/php_badwords.c index 051d931..5d4e3ef 100644 --- a/php_badwords.c +++ b/php_badwords.c @@ -16,12 +16,6 @@ #include "config.h" #endif -#include "php.h" -#include "ext/standard/php_string.h" -#include "ext/standard/php_var.h" -#include "ext/standard/php_smart_str.h" -#include "ext/standard/info.h" - #include #include #include @@ -29,8 +23,8 @@ #include #include -#include "php_badwords.h" #include "_mbsupport.h" +#include "php_badwords.h" /* True global resources - no need for thread safety here */ static int le_badwords_compiler, le_badwords_trie; @@ -47,41 +41,64 @@ zend_function_entry badwords_functions[] = { PHP_FE(badwords_match, NULL) PHP_FE(badwords_replace, NULL) PHP_FE(badwords_version, NULL) - {NULL, NULL, NULL} /* Must be the last line in badwords_functions[] */ + + // {NULL, NULL, NULL} /* Must be the last line in badwords_functions[] */ + PHP_FE_END }; /* }}} */ /* {{{ badwords_module_entry */ zend_module_entry badwords_module_entry = { -#if ZEND_MODULE_API_NO >= 20010901 +#if PHP_MAJOR_VERSION < 7 +# if ZEND_MODULE_API_NO >= 20010901 + STANDARD_MODULE_HEADER, +# endif +#else STANDARD_MODULE_HEADER, #endif "badwords", badwords_functions, PHP_MINIT(badwords), PHP_MSHUTDOWN(badwords), - NULL, - NULL, + PHP_RINIT(badwords), + PHP_RSHUTDOWN(badwords), PHP_MINFO(badwords), -#if ZEND_MODULE_API_NO >= 20010901 +#if PHP_MAJOR_VERSION < 7 +# if ZEND_MODULE_API_NO >= 20010901 + PHP_BADWORDS_VERSION, +# endif +#else PHP_BADWORDS_VERSION, #endif STANDARD_MODULE_PROPERTIES }; /* }}} */ -#ifdef COMPILE_DL_BADWORDS -ZEND_GET_MODULE(badwords) +// #ifdef COMPILE_DL_BADWORDS +// ZEND_GET_MODULE(badwords) +// #endif + +#if PHP_MAJOR_VERSION < 7 +# ifdef COMPILE_DL_BADWORDS + ZEND_GET_MODULE(badwords) +# endif +#else +# ifdef COMPILE_DL_BADWORDS +# ifdef ZTS + ZEND_TSRMLS_CACHE_DEFINE() +# endif + ZEND_GET_MODULE(badwords) +# endif #endif -static void php_badwords_compiler_dtor(zend_rsrc_list_entry *rsrc TSRMLS_DC) +static void php_badwords_compiler_dtor(COM57_ZEND_RESURCE_T *rsrc TSRMLS_DC) { struct bw_trie_compiler_t *compiler = (struct bw_trie_compiler_t *) rsrc->ptr; bw_trie_compiler_free(compiler); } -static void php_badwords_trie_dtor(zend_rsrc_list_entry *rsrc TSRMLS_DC) +static void php_badwords_trie_dtor(COM57_ZEND_RESURCE_T *rsrc TSRMLS_DC) { struct bw_trie_mmap_t *mmi = (struct bw_trie_mmap_t *) rsrc->ptr; if (mmi && --mmi->refcount <= 0) { @@ -90,6 +107,27 @@ static void php_badwords_trie_dtor(zend_rsrc_list_entry *rsrc TSRMLS_DC) } } +/* {{{ PHP_RINIT_FUNCTION + */ +PHP_RINIT_FUNCTION(badwords) +{ +#if PHP_MAJOR_VERSION >= 7 +# if defined(COMPILE_DL_BADWORDS) && defined(ZTS) + ZEND_TSRMLS_CACHE_UPDATE(); +# endif +#endif + return SUCCESS; +} +/* }}} */ + +/* {{{ PHP_RSHUTDOWN_FUNCTION + */ +PHP_RSHUTDOWN_FUNCTION(badwords) +{ + return SUCCESS; +} +/* }}} */ + /* {{{ PHP_MINIT_FUNCTION */ PHP_MINIT_FUNCTION(badwords) @@ -119,6 +157,9 @@ PHP_MINFO_FUNCTION(badwords) php_info_print_table_start(); php_info_print_table_header(2, "badwords support", "enabled"); php_info_print_table_row(2, "extension version", PHP_BADWORDS_VERSION); +#if PHP_MAJOR_VERSION >= 7 + php_info_print_table_row(2, "badwords copyright", "Copyright (c) 2018 Hupu Inc. All Rights Reserved."); +#endif php_info_print_table_end(); } /* }}} */ @@ -128,16 +169,18 @@ PHP_MINFO_FUNCTION(badwords) PHP_FUNCTION(badwords_compiler_create) { struct bw_trie_compiler_t *compiler; - int trie_encoding = BW_ENC_UTF8; + long trie_encoding = BW_ENC_UTF8; zend_bool case_sensitive = 0; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|lb", &trie_encoding, &case_sensitive) == FAILURE) { return; } - compiler = bw_trie_compiler_create(trie_encoding, case_sensitive); - - ZEND_REGISTER_RESOURCE(return_value, compiler, le_badwords_compiler); + if (!(compiler = bw_trie_compiler_create(trie_encoding, case_sensitive))) { + return; + } + + COM57_ZEND_REGISTER_RESOURCE(return_value, compiler, le_badwords_compiler); } /* }}} */ @@ -148,12 +191,13 @@ PHP_FUNCTION(badwords_compiler_append) { struct bw_trie_compiler_t *compiler; zval *zcompiler; - zval **from; char *to = NULL; - int to_len = 0; int ac = ZEND_NUM_ARGS(); long added, total_added = 0; +#if PHP_MAJOR_VERSION < 7 + zval **from; + int to_len = 0; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "rZ|s", &zcompiler, &from, &to, &to_len) == FAILURE) { return; } @@ -165,9 +209,11 @@ PHP_FUNCTION(badwords_compiler_append) ZEND_FETCH_RESOURCE(compiler, struct bw_trie_compiler_t *, &zcompiler, -1, PHP_BADWORDS_COMPILER_RES_NAME, le_badwords_compiler); + // 字符串 append if (Z_TYPE_PP(from) != IS_ARRAY) { convert_to_string_ex(from); - added = bw_trie_compiler_add_word(compiler, Z_STRVAL_PP(from), Z_STRLEN_PP(from), to, to_len); + added = bw_trie_compiler_add_word(compiler, (uint8_t *)Z_STRVAL_PP(from), Z_STRLEN_PP(from), (uint8_t *)to, to_len); + if (added >= 0) { RETURN_LONG(added); } else { @@ -177,6 +223,38 @@ PHP_FUNCTION(badwords_compiler_append) /* HASH */ HashTable *hash = HASH_OF(*from); +#else + zval *from; + size_t to_len = 0; + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "rz|s", &zcompiler, &from, &to, &to_len) == FAILURE) { + return; + } + + if (ac == 2 && Z_TYPE_P(from) != IS_ARRAY) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "The second argument is not an array when only 2 arguments"); + RETURN_FALSE; + } + + compiler = (struct bw_trie_compiler_t *)zend_fetch_resource(Z_RES_P(zcompiler), PHP_BADWORDS_COMPILER_RES_NAME, le_badwords_compiler); + + // 字符串 append + if (Z_TYPE_P(from) != IS_ARRAY) { + convert_to_string_ex(from); + added = bw_trie_compiler_add_word(compiler, (uint8_t *)Z_STRVAL_P(from), Z_STRLEN_P(from), (uint8_t *)to, to_len); + + if (added >= 0) { + RETURN_LONG(added); + } else { + RETURN_FALSE; + } + } + + /* HASH */ + HashTable *hash = HASH_OF(from); +#endif + + +#if PHP_MAJOR_VERSION < 7 HashPosition hpos; zval **entry; int key_len, elen; @@ -186,10 +264,9 @@ PHP_FUNCTION(badwords_compiler_append) zval ktmp, etmp; zend_hash_internal_pointer_reset_ex(hash, &hpos); - while (zend_hash_get_current_data_ex(hash, (void **)&entry, &hpos) == SUCCESS) { /* KEY */ - keytype = zend_hash_get_current_key_ex(hash, &key, &key_len, &num_key, 0, &hpos); + keytype = zend_hash_get_current_key_ex(hash, &key, &key_len, (zend_ulong *)&num_key, 0, &hpos); if (keytype == HASH_KEY_IS_LONG) { ZVAL_LONG(&ktmp, num_key); @@ -213,7 +290,7 @@ PHP_FUNCTION(badwords_compiler_append) } /* ADD... */ - added = bw_trie_compiler_add_word(compiler, key, key_len, eval, elen); + added = bw_trie_compiler_add_word(compiler, (uint8_t *)key, key_len, (uint8_t *)eval, elen); if (Z_TYPE_PP(entry) != IS_STRING) zval_dtor(&etmp); @@ -230,7 +307,54 @@ PHP_FUNCTION(badwords_compiler_append) zend_hash_move_forward_ex(hash, &hpos); } - +#else + HashPosition hpos; + zval *entry; + zend_string *key, *eval; + zend_ulong num_key; + zval ktmp, etmp; + int keytype; + + zend_hash_internal_pointer_reset_ex(hash, &hpos); + while ((entry = zend_hash_get_current_data_ex(hash, &hpos)) != SUCCESS) { + /* KEY */ + keytype = zend_hash_get_current_key_ex(hash, &key, &num_key, &hpos); + + if (keytype == HASH_KEY_IS_LONG) { + ZVAL_LONG(&ktmp, num_key); + convert_to_string(&ktmp); + key = Z_STR(ktmp); + } + + /* VALUE */ + if (Z_TYPE_P(entry) != IS_STRING) { + etmp = *entry; + zval_copy_ctor(&etmp); + convert_to_string(&etmp); + eval = Z_STR(etmp); + } else { + eval = Z_STR_P(entry); + } + + /* ADD... */ + added = bw_trie_compiler_add_word(compiler, (uint8_t *)ZSTR_VAL(key), ZSTR_LEN(key), (uint8_t *)ZSTR_VAL(eval), ZSTR_LEN(eval)); + + if (Z_TYPE_P(entry) != IS_STRING) + zval_dtor(&etmp); + if (keytype == HASH_KEY_IS_LONG) + zval_dtor(&ktmp); + + /* CHECK... */ + if (added > 0) + total_added += added; + /* + if (added < 0) + break; + */ + + zend_hash_move_forward_ex(hash, &hpos); + } +#endif RETURN_LONG(total_added); } /* }}} */ @@ -246,8 +370,8 @@ PHP_FUNCTION(badwords_compiler_compile) return; } - ZEND_FETCH_RESOURCE(compiler, struct bw_trie_compiler_t *, &zcompiler, -1, PHP_BADWORDS_COMPILER_RES_NAME, le_badwords_compiler); - + COM57_ZEND_FETCH_RESOURCE(compiler, struct bw_trie_compiler_t *, &zcompiler, -1, PHP_BADWORDS_COMPILER_RES_NAME, le_badwords_compiler); + bw_trie_compiler_compile(compiler, return_value); } /* }}} */ @@ -257,7 +381,12 @@ PHP_FUNCTION(badwords_compiler_compile) PHP_FUNCTION(badwords_create) { char *filename, *persistkey = NULL; + +#if PHP_MAJOR_VERSION < 7 int flen, klen = 0; +#else + size_t flen, klen = 0; +#endif if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", &filename, &flen, &persistkey, &klen) == FAILURE) { return; @@ -266,7 +395,7 @@ PHP_FUNCTION(badwords_create) int fd = open(filename, O_RDONLY); if (fd < 0) { if (persistkey) { - zend_hash_del(&EG(persistent_list), persistkey, klen+1); + COM57_ZEND_HASH_DEL(&EG(persistent_list), persistkey, klen+1); } RETURN_FALSE; } @@ -276,9 +405,13 @@ PHP_FUNCTION(badwords_create) if (persistkey) { struct bw_trie_mmap_t *existing_mmi; - zend_rsrc_list_entry *existing_mmi_le; + COM57_ZEND_RESURCE_T *existing_mmi_le; +#if PHP_MAJOR_VERSION < 7 if (zend_hash_find(&EG(persistent_list), persistkey, klen+1, (void **)&existing_mmi_le) == SUCCESS) { +#else + if ((existing_mmi_le = (COM57_ZEND_RESURCE_T *)zend_hash_str_find(&EG(persistent_list), persistkey, klen)) != NULL) { +#endif existing_mmi = (struct bw_trie_mmap_t *) existing_mmi_le->ptr; if (existing_mmi->trie_tim == stat.st_mtime && existing_mmi->trie_ino == stat.st_ino @@ -286,10 +419,11 @@ PHP_FUNCTION(badwords_create) && existing_mmi->mlen == stat.st_size) { existing_mmi->refcount++; close(fd); - ZEND_REGISTER_RESOURCE(return_value, existing_mmi, le_badwords_trie); + + COM57_ZEND_REGISTER_RESOURCE(return_value, existing_mmi, le_badwords_trie); return; } else { - zend_hash_del(&EG(persistent_list), persistkey, klen+1); + COM57_ZEND_HASH_DEL(&EG(persistent_list), persistkey, klen+1); } } } @@ -314,13 +448,17 @@ PHP_FUNCTION(badwords_create) mmi->trie = addr; mmi->mlen = stat.st_size; - ZEND_REGISTER_RESOURCE(return_value, mmi, le_badwords_trie); + COM57_ZEND_REGISTER_RESOURCE(return_value, mmi, le_badwords_trie); if (persistkey) { - zend_rsrc_list_entry le; + COM57_ZEND_RESURCE_T le; le.type = le_badwords_trie; le.ptr = mmi; - if (zend_hash_update(&EG(persistent_list), persistkey, klen+1, (void*)&le, sizeof(le), NULL) == SUCCESS) +#if PHP_MAJOR_VERSION < 7 + if (COM57_ZEND_HASH_UPDATE(&EG(persistent_list), persistkey, klen+1, (void*)&le, sizeof(le), NULL) == SUCCESS) +#else + if (COM57_ZEND_HASH_UPDATE(&EG(persistent_list), persistkey, klen+1, (zval *)&le, sizeof(le), NULL) == SUCCESS) +#endif mmi->refcount++; } } @@ -331,24 +469,43 @@ PHP_FUNCTION(badwords_create) */ PHP_FUNCTION(badwords_match) { - zval **trie; char *text; - int text_len; +#if PHP_MAJOR_VERSION < 7 + zval **trie; + int text_len; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zs", &trie, &text, &text_len) == FAILURE) { return; } - + if (Z_TYPE_PP(trie) == IS_STRING) { - bw_trie_match(*trie, return_value, text, text_len); + bw_trie_match(Z_STRVAL_PP(trie), return_value, (uint8_t *)text, text_len); } else if (Z_TYPE_PP(trie) == IS_RESOURCE) { struct bw_trie_mmap_t *mmi; - zval trie_; + ZEND_FETCH_RESOURCE(mmi, struct bw_trie_mmap_t *, trie, -1, PHP_BADWORDS_TRIE_RES_NAME, le_badwords_trie); - ZVAL_STRINGL(&trie_, mmi->trie, mmi->mlen, 0); - bw_trie_match(&trie_, return_value, text, text_len); + + bw_trie_match((char *)mmi->trie, return_value, (uint8_t *)text, text_len); + } +#else + zval *trie; + size_t text_len; + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zs", &trie, &text, &text_len) == FAILURE) { + return; } + + if (Z_TYPE_P(trie) == IS_STRING) { + bw_trie_match(Z_STRVAL_P(trie), return_value, (uint8_t *)text, text_len); + } + else if (Z_TYPE_P(trie) == IS_RESOURCE) { + struct bw_trie_mmap_t *mmi; + + mmi = (struct bw_trie_mmap_t *)zend_fetch_resource(Z_RES_P(trie), PHP_BADWORDS_TRIE_RES_NAME, le_badwords_trie); + + bw_trie_match((char *)mmi->trie, return_value, (uint8_t *)text, text_len); + } +#endif } /* }}} */ @@ -357,24 +514,43 @@ PHP_FUNCTION(badwords_match) */ PHP_FUNCTION(badwords_replace) { - zval **trie; char *text; - int text_len; +#if PHP_MAJOR_VERSION < 7 + zval **trie; + int text_len; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zs", &trie, &text, &text_len) == FAILURE) { return; } - + if (Z_TYPE_PP(trie) == IS_STRING) { - bw_trie_replace(*trie, return_value, text, text_len); + bw_trie_replace(Z_STRVAL_PP(trie), return_value, (uint8_t *)text, text_len); } else if (Z_TYPE_PP(trie) == IS_RESOURCE) { struct bw_trie_mmap_t *mmi; - zval trie_; + ZEND_FETCH_RESOURCE(mmi, struct bw_trie_mmap_t *, trie, -1, PHP_BADWORDS_TRIE_RES_NAME, le_badwords_trie); - ZVAL_STRINGL(&trie_, mmi->trie, mmi->mlen, 0); - bw_trie_replace(&trie_, return_value, text, text_len); + + bw_trie_replace((char *)mmi->trie, return_value, (uint8_t *)text, text_len); + } +#else + zval *trie; + size_t text_len; + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zs", &trie, &text, &text_len) == FAILURE) { + return; } + + if (Z_TYPE_P(trie) == IS_STRING) { + bw_trie_replace(Z_STRVAL_P(trie), return_value, (uint8_t *)text, text_len); + } + else if (Z_TYPE_P(trie) == IS_RESOURCE) { + struct bw_trie_mmap_t *mmi; + + mmi = (struct bw_trie_mmap_t *)zend_fetch_resource(Z_RES_P(trie), PHP_BADWORDS_TRIE_RES_NAME, le_badwords_trie); + + bw_trie_replace((char *)mmi->trie, return_value, (uint8_t *)text, text_len); + } +#endif } /* }}} */ @@ -382,7 +558,7 @@ PHP_FUNCTION(badwords_replace) */ PHP_FUNCTION(badwords_version) { - RETURN_STRING(PHP_BADWORDS_VERSION, 1); + COM57_RETURN_STRING(PHP_BADWORDS_VERSION, 1); } /* }}} */ diff --git a/php_badwords.h b/php_badwords.h index b0852ff..a038d33 100644 --- a/php_badwords.h +++ b/php_badwords.h @@ -15,6 +15,8 @@ #ifndef PHP_BADWORDS_H #define PHP_BADWORDS_H +#include "php57_include.h" + /** * Compiler Example: * $compiler = badwords_compiler_create(BADWORDS_ENCODING_UTF8, True); @@ -38,12 +40,16 @@ extern zend_module_entry badwords_module_entry; #define phpext_badwords_ptr &badwords_module_entry + #ifdef PHP_WIN32 -#define PHP_BADWORDS_API __declspec(dllexport) +# define PHP_BADWORDS_API __declspec(dllexport) +#elif defined(__GNUC__) && __GNUC__ >= 4 +# define PHP_BADWORDS_API __attribute__ ((visibility("default"))) #else -#define PHP_BADWORDS_API +# define PHP_BADWORDS_API #endif + #ifdef ZTS #include "TSRM.h" #endif @@ -57,6 +63,15 @@ extern zend_module_entry badwords_module_entry; PHP_MINIT_FUNCTION(badwords); PHP_MSHUTDOWN_FUNCTION(badwords); PHP_MINFO_FUNCTION(badwords); +PHP_RINIT_FUNCTION(badwords); +PHP_RSHUTDOWN_FUNCTION(badwords); + +#if PHP_MAJOR_VERSION >= 7 +# define BADWORDS_G(v) ZEND_MODULE_GLOBALS_ACCESSOR(badwords, v) +# if defined(ZTS) && defined(COMPILE_DL_BADWORDS) + ZEND_TSRMLS_CACHE_EXTERN() +# endif +#endif PHP_FUNCTION(badwords_compiler_create); PHP_FUNCTION(badwords_compiler_append); @@ -66,7 +81,7 @@ PHP_FUNCTION(badwords_match); PHP_FUNCTION(badwords_replace); PHP_FUNCTION(badwords_version); -#define PHP_BADWORDS_VERSION "1.1.3" +#define PHP_BADWORDS_VERSION "1.1.4" struct bw_trie_mmap_t { int32_t refcount;