diff --git a/.gitignore b/.gitignore index 3d30f57..c5da1f0 100644 --- a/.gitignore +++ b/.gitignore @@ -136,3 +136,7 @@ libinjection/libinjection_xss.* libinjection/libinjection_html5.* libinjection/libinjection_sqli* libinjection/libinjection_wrap* +libinjection/libinjection_error.h + +# Generated files +words.py diff --git a/Makefile b/Makefile index f3aeae4..ad93292 100644 --- a/Makefile +++ b/Makefile @@ -4,15 +4,13 @@ all: build # build: upstream libinjection/libinjection_wrap.c - rm -f libinjection.py libinjection.pyc - python setup.py --verbose build --force + python3 setup.py --verbose build_ext --inplace install: build - sudo python setup.py --verbose install + sudo python3 setup.py --verbose install test-unit: build words.py - python setup.py build_ext --inplace - PYTHON_PATH='.' nosetests -v --with-xunit test_driver.py + python3 -m pytest test_driver.py -v .PHONY: test test: test-unit @@ -24,16 +22,35 @@ speed: upstream: [ -d $@ ] || git clone --depth=1 https://github.com/libinjection/libinjection.git upstream -libinjection/libinjection.h libinjection/libinjection_sqli.h: upstream +libinjection/libinjection.h libinjection/libinjection_sqli.h libinjection/libinjection_error.h \ +libinjection/libinjection_xss.h libinjection/libinjection_html5.h: upstream cp -f upstream/src/libinjection*.h upstream/src/libinjection*.c libinjection/ + # Compatibility patches for SWIG wrapping: fix type mismatches and visibility. + # These sed invocations are pattern-matched to avoid breaking unrelated code. + # + # Fix return type mismatch: h5_state_data uses injection_result_t in definition but int in declaration + sed -i 's/^static int h5_state_data(/static injection_result_t h5_state_data(/' libinjection/libinjection_html5.c + # Fix return type mismatch: libinjection_is_sqli declared as injection_result_t but defined as int + sed -i 's/^int libinjection_is_sqli(/injection_result_t libinjection_is_sqli(/' libinjection/libinjection_sqli.c + # Remove static from helper functions so SWIG can wrap and expose them to Python + # (static functions in a header cannot be called from libinjection_wrap.c) + sed -i 's/^static void libinjection_sqli_reset(/void libinjection_sqli_reset(/' libinjection/libinjection_sqli.h libinjection/libinjection_sqli.c + sed -i ':a;N;$$!ba;s/static char\nlibinjection_sqli_lookup_word/char\nlibinjection_sqli_lookup_word/g' libinjection/libinjection_sqli.h libinjection/libinjection_sqli.c + sed -i ':a;N;$$!ba;s/static int\nlibinjection_sqli_blacklist/int\nlibinjection_sqli_blacklist/g' libinjection/libinjection_sqli.h libinjection/libinjection_sqli.c + sed -i ':a;N;$$!ba;s/static int\nlibinjection_sqli_not_whitelist/int\nlibinjection_sqli_not_whitelist/g' libinjection/libinjection_sqli.h libinjection/libinjection_sqli.c words.py: Makefile json2python.py upstream - ./json2python.py < upstream/src/sqlparse_data.json > words.py + python3 json2python.py < upstream/src/sqlparse_data.json > words.py -libinjection/libinjection_wrap.c: libinjection/libinjection.i libinjection/libinjection.h libinjection/libinjection_sqli.h +libinjection/libinjection_wrap.c: libinjection/libinjection.i libinjection/libinjection.h \ +libinjection/libinjection_sqli.h libinjection/libinjection_error.h \ +libinjection/libinjection_xss.h libinjection/libinjection_html5.h swig -version - swig -py3 -python -builtin -Wall -Wextra libinjection/libinjection.i + swig -python -builtin -Wall -Wextra \ + -o libinjection/libinjection_wrap.c \ + -outdir libinjection \ + libinjection/libinjection.i .PHONY: copy @@ -50,5 +67,6 @@ clean: @rm -f nosetests.xml @rm -f words.py @rm -f libinjection/*~ libinjection/*.pyc - @rm -f libinjection/libinjection.h libinjection/libinjection_sqli.h libinjection/libinjection_sqli.c libinjection/libinjection_sqli_data.h + @rm -f libinjection/libinjection.h libinjection/libinjection_error.h libinjection/libinjection_sqli.h libinjection/libinjection_sqli.c libinjection/libinjection_sqli_data.h + @rm -f libinjection/libinjection_html5.h libinjection/libinjection_html5.c libinjection/libinjection_xss.h libinjection/libinjection_xss.c @rm -f libinjection/libinjection_wrap.c libinjection/libinjection.py diff --git a/README.md b/README.md index d2937aa..e22005a 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,97 @@ # python3-libinjection libInjection Python3 bindings + +## Overview + +Python3 bindings for [libinjection](https://github.com/libinjection/libinjection) - a SQL/SQLI tokenizer, parser and analyzer. + +## Requirements + +- Python 3.x +- SWIG 4.x +- GCC or compatible C compiler + +## Building + +### 1. Clone the repository and get upstream libinjection + +```bash +git clone https://github.com/libinjection/python3-libinjection.git +cd python3-libinjection +make upstream +``` + +### 2. Copy upstream C source files + +```bash +make libinjection/libinjection.h libinjection/libinjection_sqli.h libinjection/libinjection_error.h +``` + +### 3. Generate the SWIG wrapper + +```bash +swig -python -builtin -Wall -Wextra \ + -o libinjection/libinjection_wrap.c \ + -outdir libinjection \ + libinjection/libinjection.i +``` + +### 4. Build the Python extension + +```bash +python3 setup.py build_ext --inplace +``` + +Or using the Makefile: + +```bash +make build +``` + +### 5. Generate the word lookup table (needed for tests) + +```bash +python3 json2python.py < upstream/src/sqlparse_data.json > words.py +``` + +## Usage + +### SQLi Detection + +```python +import libinjection + +# Simple API - detect SQLi in a string +result, fingerprint = libinjection.sqli("1 UNION SELECT * FROM users") +if result: + print(f"SQLi detected! Fingerprint: {fingerprint}") + +# Advanced API with state object +state = libinjection.sqli_state() +libinjection.sqli_init(state, "1 UNION SELECT * FROM users", + libinjection.FLAG_QUOTE_NONE | libinjection.FLAG_SQL_ANSI) +libinjection.sqli_callback(state, None) +if libinjection.is_sqli(state): + print(f"SQLi detected! Fingerprint: {state.fingerprint}") +``` + +### XSS Detection + +```python +import libinjection + +# Detect XSS in a string +result = libinjection.xss("") +if result: + print("XSS detected!") +``` + +## Testing + +Run the test suite using pytest from the repository root: + +```bash +python3 -m pytest test_driver.py test_api.py -v +``` + +> **Note:** `upstream/tests/` must exist (run `make upstream` first) for `test_driver.py` to find test data. diff --git a/pytest.py b/example_sqli.py similarity index 100% rename from pytest.py rename to example_sqli.py diff --git a/json2python.py b/json2python.py index 83fedc6..381fc5e 100755 --- a/json2python.py +++ b/json2python.py @@ -16,6 +16,9 @@ def toc(obj): import libinjection def lookup(state, stype, keyword): + # keyword is passed as bytes from C; decode to str for dict lookup + if isinstance(keyword, bytes): + keyword = keyword.decode('latin-1') keyword = keyword.upper() if stype == libinjection.LOOKUP_FINGERPRINT: if keyword in fingerprints and libinjection.sqli_not_whitelist(state): diff --git a/libinjection/__init__.py b/libinjection/__init__.py index 84d587a..f16540d 100644 --- a/libinjection/__init__.py +++ b/libinjection/__init__.py @@ -1 +1 @@ -from libinjection import * +from .libinjection import * diff --git a/libinjection/libinjection.i b/libinjection/libinjection.i index 3f279da..2a9b370 100644 --- a/libinjection/libinjection.i +++ b/libinjection/libinjection.i @@ -3,7 +3,10 @@ %{ #include "libinjection.h" #include "libinjection_sqli.h" +#include "libinjection_xss.h" +#include "libinjection_error.h" #include +#include /* This is the callback function that runs a python function * @@ -13,26 +16,45 @@ static char libinjection_python_check_fingerprint(sfilter* sf, int lookuptype, c PyObject *fp; PyObject *arglist; PyObject *result; - const char* strtype; char ch; // get sfilter->pattern // convert to python string fp = SWIG_InternalNewPointerObj((void*)sf, SWIGTYPE_p_libinjection_sqli_state,0); - arglist = Py_BuildValue("(Nis#)", fp, lookuptype, word, len); + // Use y# (bytes) format instead of s# (str) to avoid UnicodeDecodeError on + // non-UTF-8 bytes (e.g. 0xA0 word separators). The Python callback will + // receive the word as a bytes object and should decode it as needed. + arglist = Py_BuildValue("(Niy#)", fp, lookuptype, word, len); + if (arglist == NULL) { + // Py_BuildValue failed (e.g., encoding error); treat as not found + return '\0'; + } // call pyfunct with string arg result = PyObject_CallObject((PyObject*) sf->userdata, arglist); Py_DECREF(arglist); if (result == NULL) { - printf("GOT NULL\n"); // python call has an exception // pass it back ch = '\0'; } else { - // convert value of python call to a char - strtype = PyString_AsString(result); - ch = strtype[0]; + // convert value of python call to a char (Python 3 compatible) + if (PyUnicode_Check(result)) { + Py_ssize_t size; + const char* str = PyUnicode_AsUTF8AndSize(result, &size); + if (str != NULL && size > 0) { + ch = str[0]; + } else { + // Clear any exception set by PyUnicode_AsUTF8AndSize on failure + PyErr_Clear(); + ch = '\0'; + } + } else if (PyBytes_Check(result)) { + const char* str = PyBytes_AsString(result); + ch = (str != NULL) ? str[0] : '\0'; + } else { + ch = '\0'; + } Py_DECREF(result); } return ch; @@ -65,8 +87,61 @@ for (i = 0; i < $1_dim0; i++) { } } -// automatically append string length into arg array -%apply (char *STRING, size_t LENGTH) { (const char *s, size_t slen) }; +// automatically append string length into arg array. +// Accept both str (encoded as UTF-8) and bytes (passed through as-is). +// Using bytes is recommended when the input may contain non-ASCII octets, +// since str will be UTF-8 encoded which changes the byte values. +%typemap(in) (const char *s, size_t slen) (Py_buffer _view, int _must_release) { + _must_release = 0; + if (PyBytes_Check($input)) { + if (PyObject_GetBuffer($input, &_view, PyBUF_SIMPLE) != 0) SWIG_fail; + $1 = (const char *)_view.buf; + $2 = (size_t)_view.len; + _must_release = 1; + } else if (PyUnicode_Check($input)) { + Py_ssize_t _len; + $1 = PyUnicode_AsUTF8AndSize($input, &_len); + if (!$1) SWIG_fail; + $2 = (size_t)_len; + } else { + PyErr_SetString(PyExc_TypeError, "expected str or bytes"); + SWIG_fail; + } +} +%typemap(freearg) (const char *s, size_t slen) { + if (_must_release$argnum) PyBuffer_Release(&_view$argnum); +} +%typemap(in) (const char *s, size_t len) (Py_buffer _view, int _must_release) { + _must_release = 0; + if (PyBytes_Check($input)) { + if (PyObject_GetBuffer($input, &_view, PyBUF_SIMPLE) != 0) SWIG_fail; + $1 = (const char *)_view.buf; + $2 = (size_t)_view.len; + _must_release = 1; + } else if (PyUnicode_Check($input)) { + Py_ssize_t _len; + $1 = PyUnicode_AsUTF8AndSize($input, &_len); + if (!$1) SWIG_fail; + $2 = (size_t)_len; + } else { + PyErr_SetString(PyExc_TypeError, "expected str or bytes"); + SWIG_fail; + } +} +%typemap(freearg) (const char *s, size_t len) { + if (_must_release$argnum) PyBuffer_Release(&_view$argnum); +} + +// Make the fingerprint output parameter in libinjection_sqli() work as an output +// The fingerprint buffer size matches libinjection's internal LIBINJECTION_SQLI_MAX_TOKENS (5) + null byte +#define LIBINJECTION_FINGERPRINT_SIZE 8 +%typemap(in, numinputs=0) char fingerprint[] (char temp[LIBINJECTION_FINGERPRINT_SIZE]) { + memset(temp, 0, sizeof(temp)); + $1 = temp; +} +%typemap(argout) char fingerprint[] { + $result = SWIG_Python_AppendOutput($result, PyUnicode_FromString($1)); +} %typemap(in) (ptr_lookup_fn fn, void* userdata) { if ($input == Py_None) { @@ -77,5 +152,7 @@ for (i = 0; i < $1_dim0; i++) { $2 = $input; } } +%include "libinjection_error.h" %include "libinjection.h" %include "libinjection_sqli.h" +%include "libinjection_xss.h" diff --git a/setup.py b/setup.py index eb47024..b4010e1 100644 --- a/setup.py +++ b/setup.py @@ -5,20 +5,40 @@ nickg@client9.com BSD License -- see COPYING.txt for details """ +import os + try: from setuptools import setup, Extension except ImportError: from distutils.core import setup, Extension + +def get_libinjection_version(): + """Read the libinjection version from the upstream source file, if available.""" + version_file = os.path.join(os.path.dirname(__file__), + 'upstream', 'src', 'libinjection_sqli.c') + if os.path.exists(version_file): + with open(version_file, encoding="utf-8") as f: + for line in f: + if '#define LIBINJECTION_VERSION' in line and '__clang_analyzer__' not in line: + # Extract version string from: #define LIBINJECTION_VERSION "x.y.z" + parts = line.strip().split('"') + if len(parts) >= 2: + return parts[1] + return 'undefined' + + +LIBINJECTION_VERSION = get_libinjection_version() + MODULE = Extension( - '_libinjection', [ + 'libinjection._libinjection', [ 'libinjection/libinjection_wrap.c', 'libinjection/libinjection_sqli.c', 'libinjection/libinjection_html5.c', 'libinjection/libinjection_xss.c' ], swig_opts=['-Wextra', '-builtin'], - define_macros = [], + define_macros = [('LIBINJECTION_VERSION', '"{}"'.format(LIBINJECTION_VERSION))], include_dirs = [], libraries = [], library_dirs = [], diff --git a/test_api.py b/test_api.py new file mode 100644 index 0000000..afaa376 --- /dev/null +++ b/test_api.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python +""" +API tests for libinjection Python bindings. +Covers the simple sqli() and xss() APIs as well as the stateful sqli API. +""" +import libinjection + + +def test_sqli_returns_tuple(): + """sqli() should return a (result, fingerprint) sequence.""" + result = libinjection.sqli("1 UNION SELECT * FROM users") + assert len(result) == 2, "sqli() must return a 2-element sequence (result, fingerprint)" + + +def test_sqli_detects_injection(): + """sqli() must detect a known SQLi payload.""" + is_sqli, fingerprint = libinjection.sqli("1 UNION SELECT * FROM users") + assert is_sqli == 1, "Expected SQLi to be detected" + assert fingerprint != "", "Expected non-empty fingerprint for SQLi input" + + +def test_sqli_benign_input(): + """sqli() must not flag benign input.""" + is_sqli, fingerprint = libinjection.sqli("hello world") + assert is_sqli == 0, "Benign input should not be flagged as SQLi" + assert fingerprint == "", "Benign input should produce an empty fingerprint" + + +def test_sqli_fingerprint_content(): + """sqli() fingerprint should be a non-empty string for detected SQLi.""" + is_sqli, fingerprint = libinjection.sqli("1 UNION ALL SELECT * FROM foo") + assert is_sqli == 1 + assert isinstance(fingerprint, str) + assert len(fingerprint) > 0 + + +def test_is_sqli_stateful_api(): + """Advanced stateful API using sqli_state / sqli_init / sqli_callback / is_sqli.""" + state = libinjection.sqli_state() + libinjection.sqli_init( + state, + "1 UNION SELECT * FROM users", + libinjection.FLAG_QUOTE_NONE | libinjection.FLAG_SQL_ANSI, + ) + libinjection.sqli_callback(state, None) + assert libinjection.is_sqli(state) == 1, "Expected SQLi detection via stateful API" + assert state.fingerprint != "", "Expected fingerprint set in state" + + +def test_is_sqli_stateful_benign(): + """Stateful API should not flag benign input.""" + state = libinjection.sqli_state() + libinjection.sqli_init( + state, + "hello world", + libinjection.FLAG_QUOTE_NONE | libinjection.FLAG_SQL_ANSI, + ) + libinjection.sqli_callback(state, None) + assert libinjection.is_sqli(state) == 0, "Benign input should not be SQLi" + + +def test_xss_detects_script_tag(): + """xss() must detect a basic XSS payload.""" + result = libinjection.xss("") + assert result == 1, "Expected XSS detection for