Skip to content

Commit

Permalink
Merge pull request #2512 from kamil-certat/http_collector_dirs
Browse files Browse the repository at this point in the history
FIX: Support for extracting data from archives with dirs
  • Loading branch information
sebix authored Aug 27, 2024
2 parents 6aa1147 + 489a1be commit f62fde4
Show file tree
Hide file tree
Showing 8 changed files with 44 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

### Core
- `intelmq.lib.utils.drop_privileges`: When IntelMQ is called as `root` and dropping the privileges to user `intelmq`, also set the non-primary groups associated with the `intelmq` user. Makes the behaviour of running intelmqctl as `root` closer to the behaviour of `sudo -u intelmq ...` (PR#2507 by Mikk Margus Möll).
- `intelmq.lib.utils.unzip`: Ignore directories themselves when extracting data to prevent the extraction of empty data for a directory entries (PR#2512 by Kamil Mankowski).

### Development

Expand Down
4 changes: 2 additions & 2 deletions intelmq/lib/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,7 +538,7 @@ def extract_tar(file):
def extract(filename):
return tar.extractfile(filename).read()

return tuple(file.name for file in tar.getmembers()), tar, extract
return tuple(file.name for file in tar.getmembers() if file.isfile()), tar, extract


def extract_gzip(file):
Expand All @@ -547,7 +547,7 @@ def extract_gzip(file):

def extract_zip(file):
zfp = zipfile.ZipFile(io.BytesIO(file), "r")
return zfp.namelist(), zfp, zfp.read
return [member.filename for member in zfp.infolist() if not member.is_dir()], zfp, zfp.read


def unzip(file: bytes, extract_files: Union[bool, list], logger=None,
Expand Down
Binary file added intelmq/tests/assets/subdir.tar.gz
Binary file not shown.
3 changes: 3 additions & 0 deletions intelmq/tests/assets/subdir.tar.gz.license
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SPDX-FileCopyrightText: 2024 CERT.at GmbH

SPDX-License-Identifier: AGPL-3.0-or-later
Binary file added intelmq/tests/assets/subdir.zip
Binary file not shown.
3 changes: 3 additions & 0 deletions intelmq/tests/assets/subdir.zip.license
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SPDX-FileCopyrightText: 2024 CERT.at GmbH

SPDX-License-Identifier: AGPL-3.0-or-later
19 changes: 19 additions & 0 deletions intelmq/tests/bots/collectors/http/test_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,25 @@ def test_zip(self, mocker):
self.assertMessageEqual(0, output0)
self.assertMessageEqual(1, output1)

def test_zip_subdirs(self, mocker):
"""
Test unzipping when the zip has subdirectories
"""
prepare_mocker(mocker)
self.run_bot(parameters={'http_url': 'http://localhost/subdir.zip',
'name': 'Example feed',
},
iterations=1)

output0 = OUTPUT[0].copy()
output0['feed.url'] = 'http://localhost/subdir.zip'
output0['extra.file_name'] = 'subdir/bar'
output1 = OUTPUT[1].copy()
output1['feed.url'] = 'http://localhost/subdir.zip'
output1['extra.file_name'] = 'subdir/foo'
self.assertMessageEqual(0, output0)
self.assertMessageEqual(1, output1)

@test.skip_exotic()
def test_pgp(self, mocker):
"""
Expand Down
16 changes: 16 additions & 0 deletions intelmq/tests/lib/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,14 @@ def test_unzip_tar_gz_return_names(self):
self.assertEqual(tuple(result), (('bar', b'bar text\n'),
('foo', b'foo text\n')))

def test_unzip_tar_gz_with_subdir(self):
""" Test the unzip function with a tar gz file containing a subdirectory and return_names. Test that the directories themselves are ignored. """
filename = os.path.join(os.path.dirname(__file__), '../assets/subdir.tar.gz')
with open(filename, 'rb') as fh:
result = utils.unzip(fh.read(), extract_files=True, return_names=True)
self.assertEqual(tuple(result), (('subdir/foo', b'foo text\n'),
('subdir/bar', b'bar text\n')))

def test_unzip_gz(self):
""" Test the unzip function with a gz file. """
filename = os.path.join(os.path.dirname(__file__), '../assets/foobar.gz')
Expand Down Expand Up @@ -289,6 +297,14 @@ def test_unzip_zip_return_names(self):
self.assertEqual(tuple(result), (('bar', b'bar text\n'),
('foo', b'foo text\n')))

def test_unzip_zip_with_subdir(self):
""" Test the unzip function with a zip containing a subdirectory and returning names. Test that the directories themselves are ignored."""
filename = os.path.join(os.path.dirname(__file__), '../assets/subdir.zip')
with open(filename, 'rb') as fh:
result = utils.unzip(fh.read(), extract_files=True, return_names=True)
self.assertEqual(tuple(result), (('subdir/bar', b'bar text\n'),
('subdir/foo', b'foo text\n')))

def test_file_name_from_response(self):
""" test file_name_from_response """
response = requests.Response()
Expand Down

0 comments on commit f62fde4

Please sign in to comment.