From eb695ecb33f111a9543a93a4453f2f0e6e9173bb Mon Sep 17 00:00:00 2001 From: Volodymyr Polishchuk Date: Thu, 17 Dec 2020 14:32:26 +0200 Subject: [PATCH 1/6] Fix paths issues (double slashes, end slashes) --- src/org/apache/hadoop/WebHDFS.php | 46 +++++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/src/org/apache/hadoop/WebHDFS.php b/src/org/apache/hadoop/WebHDFS.php index fe664ef..293ceb4 100644 --- a/src/org/apache/hadoop/WebHDFS.php +++ b/src/org/apache/hadoop/WebHDFS.php @@ -211,17 +211,23 @@ public function listFiles($path, $recursive = false, $includeFileMetaData = fals switch ($fileEntity->type) { case 'DIRECTORY': if ($recursive === true) { - $result = array_merge($result, - $this->listFiles($path.$fileEntity->pathSuffix.'/', true, $includeFileMetaData, - $maxAmountOfFiles - sizeof($result))); + $result = array_merge( + $result, + $this->listFiles( + $this->concatPath([$path, $fileEntity->pathSuffix]), + true, + $includeFileMetaData, + $maxAmountOfFiles - sizeof($result) + ) + ); } break; default: if ($includeFileMetaData === true) { - $fileEntity->path = $path.$fileEntity->pathSuffix; + $fileEntity->path = $this->concatPath([$path, $fileEntity->pathSuffix]); $result[] = $fileEntity; } else { - $result[] = $path.$fileEntity->pathSuffix; + $result[] = $this->concatPath([$path, $fileEntity->pathSuffix]); } } // recursion will be interrupted since we subtract the amount of the current result set from the maxAmountOfFiles amount with calling the next recursion @@ -247,15 +253,19 @@ public function listDirectories($path, $recursive = false, $includeFileMetaData switch ($fileEntity->type) { case 'DIRECTORY': if ($includeFileMetaData === true) { - $fileEntity->path = $path.$fileEntity->pathSuffix; + $fileEntity->path = $this->concatPath([$path, $fileEntity->pathSuffix]); $result[] = $fileEntity; } else { - $result[] = $path.$fileEntity->pathSuffix; + $result[] = $this->concatPath([$path, $fileEntity->pathSuffix]); } if ($recursive === true) { $result = array_merge($result, - $this->listDirectories($path.$fileEntity->pathSuffix.'/', $recursive, - $includeFileMetaData)); + $this->listDirectories( + $this->concatPath([$path, $fileEntity->pathSuffix]), + $recursive, + $includeFileMetaData + ) + ); } break; } @@ -445,4 +455,22 @@ private function getResponseErrorException($responseData) return new WebHDFS_Exception($exceptionMessage, $exceptionCode); } + + private function concatPath(array $paths) { + $result = ''; + foreach ($paths as $path) { + if (!$result || preg_match('/.+\/$/', $result)) { + $result .= $path; + continue; + } + + $result .= '/' . $path; + } + return $this->removeMultiSlashFromPath($result); + } + + private function removeMultiSlashFromPath($path) { + return preg_replace('/(\/)\/+/', '$1', $path); + } + } From deda59816628708b12003feab0cdc19fb7cd640d Mon Sep 17 00:00:00 2001 From: Volodymyr Polishchuk Date: Thu, 17 Dec 2020 14:33:02 +0200 Subject: [PATCH 2/6] Fix recursive listing of files --- src/org/apache/hadoop/WebHDFS.php | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/org/apache/hadoop/WebHDFS.php b/src/org/apache/hadoop/WebHDFS.php index 293ceb4..f404ece 100644 --- a/src/org/apache/hadoop/WebHDFS.php +++ b/src/org/apache/hadoop/WebHDFS.php @@ -200,7 +200,15 @@ private function _listStatus($path, $cleanLastRequest = false) } } - public function listFiles($path, $recursive = false, $includeFileMetaData = false, $maxAmountOfFiles = false) + /** + * @param string $path + * @param bool $recursive + * @param bool $includeFileMetaData + * @param int $maxAmountOfFiles + * @return array + * @throws WebHDFS_Exception + */ + public function listFiles($path, $recursive = false, $includeFileMetaData = false, $maxAmountOfFiles = 0) { $result = array(); $listStatusResult = $this->_listStatus($path); @@ -231,7 +239,7 @@ public function listFiles($path, $recursive = false, $includeFileMetaData = fals } } // recursion will be interrupted since we subtract the amount of the current result set from the maxAmountOfFiles amount with calling the next recursion - if (sizeof($result) >= $maxAmountOfFiles) { + if ($maxAmountOfFiles !== 0 && sizeof($result) >= $maxAmountOfFiles) { break; } } From fef341a5612e934ef0ffee8ff59b685af6c9a882 Mon Sep 17 00:00:00 2001 From: Volodymyr Polishchuk Date: Thu, 17 Dec 2020 14:34:20 +0200 Subject: [PATCH 3/6] Fix overwrite parameter conversion --- src/org/apache/hadoop/WebHDFS.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/org/apache/hadoop/WebHDFS.php b/src/org/apache/hadoop/WebHDFS.php index f404ece..8d50067 100644 --- a/src/org/apache/hadoop/WebHDFS.php +++ b/src/org/apache/hadoop/WebHDFS.php @@ -52,7 +52,7 @@ public function create( $options = array( 'op' => 'CREATE', - 'overwrite' => $overwrite, + 'overwrite' => $overwrite ? 'true' : 'false', 'blocksize' => $blocksize, 'replication' => $replication, 'permission' => $permission, @@ -79,7 +79,7 @@ public function createWithData( ) { $options = array( 'op' => 'CREATE', - 'overwrite' => $overwrite, + 'overwrite' => $overwrite ? 'true' : 'false', 'blocksize' => $blockSize, 'replication' => $replication, 'permission' => $permission, From 35837f2cdb55fc26ee5ad2a221fec3f9ee2a9cc1 Mon Sep 17 00:00:00 2001 From: Volodymyr Polishchuk Date: Mon, 21 Dec 2020 11:44:21 +0200 Subject: [PATCH 4/6] Replace maxAmountOfFiles with maxDepth, as it is a more expected result When we read files by their number, we never get the right result. For example, we have the following file system structure: folder1/ - subfolder1/ --- file1 --- file2 - file3 - file4 folder2/ - subfolder2/ --- file5 --- file6 - file5 and maxAmountOfFiles = 3, then we will read [folder1, subfolder1, file3, file3, folder2, subfolder2, file5]. And this is not expected result. It is better to read by depth, which will give the expected result. --- composer.json | 3 ++- src/org/apache/hadoop/WebHDFS.php | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/composer.json b/composer.json index 85baaaf..582739f 100644 --- a/composer.json +++ b/composer.json @@ -28,7 +28,8 @@ ], "require": { "php": ">=5.4.0", - "ext-curl": "*" + "ext-curl": "*", + "ext-json": "*" }, "autoload": { "psr-0": { diff --git a/src/org/apache/hadoop/WebHDFS.php b/src/org/apache/hadoop/WebHDFS.php index 8d50067..dee95b0 100644 --- a/src/org/apache/hadoop/WebHDFS.php +++ b/src/org/apache/hadoop/WebHDFS.php @@ -204,11 +204,11 @@ private function _listStatus($path, $cleanLastRequest = false) * @param string $path * @param bool $recursive * @param bool $includeFileMetaData - * @param int $maxAmountOfFiles + * @param int $maxDepth max depth to search recursively. When value below zero, it will search to the end of tree * @return array * @throws WebHDFS_Exception */ - public function listFiles($path, $recursive = false, $includeFileMetaData = false, $maxAmountOfFiles = 0) + public function listFiles($path, $recursive = false, $includeFileMetaData = false, $maxDepth = -1) { $result = array(); $listStatusResult = $this->_listStatus($path); @@ -225,7 +225,7 @@ public function listFiles($path, $recursive = false, $includeFileMetaData = fals $this->concatPath([$path, $fileEntity->pathSuffix]), true, $includeFileMetaData, - $maxAmountOfFiles - sizeof($result) + $maxDepth - 1 ) ); } @@ -239,7 +239,7 @@ public function listFiles($path, $recursive = false, $includeFileMetaData = fals } } // recursion will be interrupted since we subtract the amount of the current result set from the maxAmountOfFiles amount with calling the next recursion - if ($maxAmountOfFiles !== 0 && sizeof($result) >= $maxAmountOfFiles) { + if ($maxDepth === 0) { break; } } From 86cfad804b1fe826248569360cd486c47df48f90 Mon Sep 17 00:00:00 2001 From: Volodymyr Polishchuk Date: Mon, 21 Dec 2020 12:58:32 +0200 Subject: [PATCH 5/6] Update package name and version --- README.md | 4 ++-- composer.json | 10 ++++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 0695080..36a415d 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ php-WebHDFS is a PHP client for [WebHDFS](http://hadoop.apache.org/docs/r2.0.3-a ## Install via composer ```bash -composer require simpleenergy/php-webhdfs +composer require dreamfactory/php-webhdfs ``` ## Usage @@ -149,4 +149,4 @@ $hdfs->setReplication('user/hadoop-username/file.txt', '2'); ```php $hdfs = new WebHDFS('mynamenode.hadoop.com', '50070', 'hadoop-username'); $response = $hdfs->setTimes('user/hadoop-username/file.txt'); -``` \ No newline at end of file +``` diff --git a/composer.json b/composer.json index 582739f..9255ef4 100644 --- a/composer.json +++ b/composer.json @@ -1,9 +1,15 @@ { - "name": "simpleenergy/php-webhdfs", + "name": "dreamfactory/php-webhdfs", "description": "PHP WebHDFS, forked from https://github.com/simpleenergy/php-WebHDFS", "minimum-stability": "stable", + "repositories": [ + { + "type": "vcs", + "url": "https://github.com/dreamfactorysoftware/php-WebHDFS" + } + ], "license": "MIT", - "version": "1.0.7", + "version": "1.1.0", "authors": [ { "name": "tranch-xiao", From f5b9b71d6db1cbc1957b96a06b58b21f333e152f Mon Sep 17 00:00:00 2001 From: Danil Lytvyn Date: Mon, 16 Jan 2023 14:50:29 +0200 Subject: [PATCH 6/6] [BUGFIX] DP-460 - Get rid of invalid 'CURLINFO_EFFECTIVE_URL' cURL option - Get rid of improper 'Content-Length' HTTP header - Remove version composer`s property, as it should be omitted relying on the documentation https://getcomposer.org/doc/04-schema.md#version --- composer.json | 1 - src/org/apache/hadoop/tools/Curl.php | 3 --- 2 files changed, 4 deletions(-) diff --git a/composer.json b/composer.json index 9255ef4..3bfdc02 100644 --- a/composer.json +++ b/composer.json @@ -9,7 +9,6 @@ } ], "license": "MIT", - "version": "1.1.0", "authors": [ { "name": "tranch-xiao", diff --git a/src/org/apache/hadoop/tools/Curl.php b/src/org/apache/hadoop/tools/Curl.php index 327d5e9..190a270 100644 --- a/src/org/apache/hadoop/tools/Curl.php +++ b/src/org/apache/hadoop/tools/Curl.php @@ -63,7 +63,6 @@ private function _findRedirectUrl($url, $options) { $options[CURLOPT_URL] = $url; $options[CURLOPT_HEADER] = true; - $options[CURLINFO_EFFECTIVE_URL] = true; $options[CURLOPT_RETURNTRANSFER] = true; $header = $this->_exec($options); $matches = array(); @@ -221,8 +220,6 @@ private function _exec($options, $returnInfo = false) ['Content-Length: '.strlen($options[CURLOPT_POSTFIELDS])] ); } - } else { - $options[CURLOPT_HTTPHEADER] = array_merge($options[CURLOPT_HTTPHEADER], ['Content-Length: 0']); } }