The python binding for bzip3 with parallel support
from bz3 import compress_file , decompress_file , test_file , compress , decompress
import bz3
with open ("test_inp.txt" , "rb" ) as inp , open ("compressed.bz3" , "wb" ) as out :
compress_file (inp , out , 1000 * 1000 )
with open ("compressed.bz3" , "rb" ) as inp :
test_file (inp , True )
with open ("compressed.bz3" , "rb" ) as inp , open ("output.txt" , "wb" ) as out :
decompress_file (inp , out )
print (decompress (compress (b"12121" )))
with bz3 .open ("test.bz3" , "wt" , encoding = "utf-8" , num_threads = 4 ) as f :
f .write ("test data" )
with bz3 .open ("test.bz3" , "rt" , encoding = "utf-8" , num_threads = 4 ) as f :
print (f .read ())
use BZ3_USE_CFFI env var to specify a backend
num_threads is only available on cython backend which have openmp support
from typing import IO , Optional , Union
def compress_file (input : IO , output : IO , block_size : int ) -> None : ...
def decompress_file (input : IO , output : IO ) -> None : ...
def recover_file (input : IO , output : IO ) -> None : ...
def test_file (input : IO , should_raise : bool = ...) -> bool : ...
class BZ3File :
def __init__ (self , filename , mode : str = ..., block_size : int = ..., num_threads : int = ..., ignore_error : bool = False ) -> None : ...
def close (self ) -> None : ...
@property
def closed (self ): ...
def fileno (self ): ...
def seekable (self ): ...
def readable (self ): ...
def writable (self ): ...
def peek (self , n : int = ...): ...
def read (self , size : int = ...): ...
def read1 (self , size : int = ...): ...
def readinto (self , b ): ...
def readline (self , size : int = ...): ...
def readlines (self , size : int = ...): ...
def write (self , data ): ...
def writelines (self , seq ): ...
def seek (self , offset , whence = ...): ...
def tell (self ): ...
def open (filename , mode : str = ..., block_size : int = ..., encoding : str = ..., errors : str = ..., newline : str = ..., num_threads : int = 1 , ignore_error : bool = False ) -> BZ3File : ...
def compress (data : bytes , block_size : int = ..., num_threads : int = 1 ) -> bytes : ...
def decompress (data : bytes , num_threads : int = 1 ) -> bytes : ...
def min_memory_needed (block_size : int ) -> int : ...
def orig_size_sufficient_for_decode (block : bytes , orig_size : int ) -> int : ...
def libversion () -> str : ... # Get bzip3 version
def bound (inp : int ) -> int : ... # Return the recommended size of the output buffer for the compression functions.
# High-level api
# Compress a block of data into out buffer, zerocopy, both parameters accept objects which implements buffer-protocol.
# out must be writabel, size of out must be at least equal to bound(len(inp))
def compress_into (inp : Union [bytes , bytearray ], out : bytearray ) -> int : ...
# Decompress a block of data into out buffer, zerocopy
def decompress_into (inp : Union [bytes , bytearray ], out : bytearray ) -> int : ...
Note, high-level api won't work with low-level api, see this