New Upstream Release - yara-python

Ready changes

Summary

Merged new upstream version: 4.3.1 (was: 4.2.3).

Diff

diff --git a/README.rst b/README.rst
index 023ff8b..6b7d792 100644
--- a/README.rst
+++ b/README.rst
@@ -1,5 +1,3 @@
-.. image:: https://travis-ci.org/VirusTotal/yara-python.svg
-    :target: https://travis-ci.org/VirusTotal/yara-python
 .. image:: https://ci.appveyor.com/api/projects/status/gidnb9ulj3rje5s2?svg=true
     :target: https://ci.appveyor.com/project/plusvic/yara-python
 
@@ -24,7 +22,15 @@ Here it goes a little example:
     >>> print(matches[0].tags)
     ['bar']
     >>> print(matches[0].strings)
-    [(10L, '$a', 'lmn')]
+    [$a]
+    >>> print(matches[0].strings[0].identifier)
+    $a
+    >>> print(matches[0].strings[0].instances)
+    [lmn]
+    >>> print(matches[0].strings[0].instances[0].offset)
+    10
+    >>> print(matches[0].strings[0].instances[0].matched_length)
+    3
 
 
 Installation
diff --git a/appveyor.yml b/appveyor.yml
index 2f63a38..e2fe0ff 100755
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -11,51 +11,26 @@ environment:
     # Pre-installed Python versions, which Appveyor may upgrade to
     # a later point release.
     # See: http://www.appveyor.com/docs/installed-software#python
-
-    - PYTHON: "C:\\Python35"
-      PYTHON_VERSION: "3.5.x" # currently 3.5.4
-      PYTHON_ARCH: "32"
-      OPENSSL_LIB: "https://ci.appveyor.com/api/buildjobs/fakubeldw67e9pmg/artifacts/YARA.OpenSSL.x86.1.1.1.nupkg"
-      VS: "Visual Studio 14 2015"
-
-    - PYTHON: "C:\\Python35-x64"
-      PYTHON_VERSION: "3.5.x" # currently 3.5.4
-      PYTHON_ARCH: "64"
-      OPENSSL_LIB: "https://ci.appveyor.com/api/buildjobs/q63539qt9yqaqspo/artifacts/YARA.OpenSSL.x64.1.1.1.nupkg"
-      VS: "Visual Studio 14 2015 Win64"
-
-    - PYTHON: "C:\\Python36"
-      PYTHON_VERSION: "3.6.x" # currently 3.6.8
-      PYTHON_ARCH: "32"
-      OPENSSL_LIB: "https://ci.appveyor.com/api/buildjobs/fakubeldw67e9pmg/artifacts/YARA.OpenSSL.x86.1.1.1.nupkg"
-      VS: "Visual Studio 14 2015"
-
-    - PYTHON: "C:\\Python36-x64"
-      PYTHON_VERSION: "3.6.x" # currently 3.6.8
-      PYTHON_ARCH: "64"
-      OPENSSL_LIB: "https://ci.appveyor.com/api/buildjobs/q63539qt9yqaqspo/artifacts/YARA.OpenSSL.x64.1.1.1.nupkg"
-      VS: "Visual Studio 14 2015 Win64"
-
     - PYTHON: "C:\\Python37"
-      PYTHON_VERSION: "3.7.x" # currently 3.7.0
+      PYTHON_VERSION: "3.7.x"
       PYTHON_ARCH: "32"
       OPENSSL_LIB: "https://ci.appveyor.com/api/buildjobs/fakubeldw67e9pmg/artifacts/YARA.OpenSSL.x86.1.1.1.nupkg"
       VS: "Visual Studio 14 2015"
 
     - PYTHON: "C:\\Python37-x64"
-      PYTHON_VERSION: "3.7.x" # currently 3.7.0
+      PYTHON_VERSION: "3.7.x"
       PYTHON_ARCH: "64"
       OPENSSL_LIB: "https://ci.appveyor.com/api/buildjobs/q63539qt9yqaqspo/artifacts/YARA.OpenSSL.x64.1.1.1.nupkg"
       VS: "Visual Studio 14 2015 Win64"
 
     - PYTHON: "C:\\Python38"
-      PYTHON_VERSION: "3.8.x" # currently 3.8.0
+      PYTHON_VERSION: "3.8.x"
       PYTHON_ARCH: "32"
       OPENSSL_LIB: "https://ci.appveyor.com/api/buildjobs/fakubeldw67e9pmg/artifacts/YARA.OpenSSL.x86.1.1.1.nupkg"
       VS: "Visual Studio 14 2015"
 
     - PYTHON: "C:\\Python38-x64"
-      PYTHON_VERSION: "3.8.x" # currently 3.8.0
+      PYTHON_VERSION: "3.8.x"
       PYTHON_ARCH: "64"
       OPENSSL_LIB: "https://ci.appveyor.com/api/buildjobs/q63539qt9yqaqspo/artifacts/YARA.OpenSSL.x64.1.1.1.nupkg"
       VS: "Visual Studio 14 2015 Win64"
@@ -84,6 +59,18 @@ environment:
       OPENSSL_LIB: "https://ci.appveyor.com/api/buildjobs/q63539qt9yqaqspo/artifacts/YARA.OpenSSL.x64.1.1.1.nupkg"
       VS: "Visual Studio 14 2015 Win64"
 
+    - PYTHON: "C:\\Python311"
+      PYTHON_VERSION: "3.11.2"
+      PYTHON_ARCH: "32"
+      OPENSSL_LIB: "https://ci.appveyor.com/api/buildjobs/fakubeldw67e9pmg/artifacts/YARA.OpenSSL.x86.1.1.1.nupkg"
+      VS: "Visual Studio 14 2015"
+
+    - PYTHON: "C:\\Python311-x64"
+      PYTHON_VERSION: "3.11.2"
+      PYTHON_ARCH: "64"
+      OPENSSL_LIB: "https://ci.appveyor.com/api/buildjobs/q63539qt9yqaqspo/artifacts/YARA.OpenSSL.x64.1.1.1.nupkg"
+      VS: "Visual Studio 14 2015 Win64"
+
 install:
   # If there is a newer build queued for the same PR, cancel this one.
   # The AppVeyor 'rollout builds' option is supposed to serve the same
@@ -152,10 +139,10 @@ clone_script:
 
 build_script:
   # Build the compiled extension
-  - "%CMD_IN_ENV% python setup.py build_ext --enable-cuckoo --enable-dotnet
+  - "%CMD_IN_ENV% python setup.py build_ext --enable-cuckoo
     -L../jansson-%JANSSON_VERSION%/build/lib/Release;../openssl/lib
     -I../jansson-%JANSSON_VERSION%/build/include;../openssl/include
-    -DHASH_MODULE,HAVE_LIBCRYPTO
+    -DHASH_MODULE,HAVE_LIBCRYPTO,BUCKETS_128,CHECKSUM_1B
     -llibcrypto"
 
 after_build:
@@ -167,7 +154,6 @@ test_script:
 after_test:
   # If tests are successful, create binary packages for the project.
   - "%CMD_IN_ENV% python setup.py bdist_wheel"
-  - "%CMD_IN_ENV% python setup.py bdist --format=msi"
 
 artifacts:
   # Archive the generated packages in the ci.appveyor.com build report.
diff --git a/debian/changelog b/debian/changelog
index b8afab1..7d17dce 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,10 @@
+yara-python (4.3.1-1) UNRELEASED; urgency=low
+
+  * New upstream release.
+  * New upstream release.
+
+ -- Debian Janitor <janitor@jelmer.uk>  Sun, 11 Jun 2023 14:25:44 -0000
+
 yara-python (4.2.0-1) unstable; urgency=medium
 
   * New upstream version 4.2.0
diff --git a/setup.cfg b/setup.cfg
index ae0d046..8616118 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [metadata]
-description_file = README.md
+description_file = README.rst
 license_file = LICENSE
 
 [test]
diff --git a/setup.py b/setup.py
index d1f0a46..0248577 100644
--- a/setup.py
+++ b/setup.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2007-2013. The YARA Authors. All Rights Reserved.
+# Copyright (c) 2007-2022. The YARA Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -34,7 +34,6 @@ OPTIONS = [
    ('dynamic-linking', None, 'link dynamically against libyara'),
    ('enable-cuckoo', None, 'enable "cuckoo" module'),
    ('enable-magic', None, 'enable "magic" module'),
-   ('enable-dotnet', None, 'enable "dotnet" module'),
    ('enable-dex', None, 'enable "dex" module'),
    ('enable-macho', None, 'enable "macho" module'),
    ('enable-profiling', None, 'enable profiling features')]
@@ -44,7 +43,6 @@ BOOLEAN_OPTIONS = [
     'dynamic-linking',
     'enable-cuckoo',
     'enable-magic',
-    'enable-dotnet',
     'enable-dex',
     'enable-macho',
     'enable-profiling']
@@ -76,12 +74,13 @@ def muted(*streams):
     devnull.close()
 
 
-def has_function(function_name, include_dirs=None, libraries=None, library_dirs=None):
+def has_function(function_name, includes=None, include_dirs=None, libraries=None, library_dirs=None):
   """Checks if a given functions exists in the current platform."""
   compiler = distutils.ccompiler.new_compiler()
   with muted(sys.stdout, sys.stderr):
       result = compiler.has_function(
           function_name,
+          includes=includes,
           include_dirs=include_dirs,
           libraries=libraries,
           library_dirs=library_dirs)
@@ -118,7 +117,6 @@ class BuildCommand(build):
     self.dynamic_linking = None
     self.enable_magic = None
     self.enable_cuckoo = None
-    self.enable_dotnet = None
     self.enable_dex = None
     self.enable_macho = None
     self.enable_profiling = None
@@ -140,7 +138,6 @@ class BuildExtCommand(build_ext):
     self.dynamic_linking = None
     self.enable_magic = None
     self.enable_cuckoo = None
-    self.enable_dotnet = None
     self.enable_dex = None
     self.enable_macho = None
     self.enable_profiling = None
@@ -156,7 +153,6 @@ class BuildExtCommand(build_ext):
         ('dynamic_linking', 'dynamic_linking'),
         ('enable_magic', 'enable_magic'),
         ('enable_cuckoo', 'enable_cuckoo'),
-        ('enable_dotnet', 'enable_dotnet'),
         ('enable_dex', 'enable_dex'),
         ('enable_macho', 'enable_macho'),
         ('enable_profiling', 'enable_profiling'))
@@ -167,9 +163,6 @@ class BuildExtCommand(build_ext):
     if self.enable_cuckoo and self.dynamic_linking:
       raise distutils.errors.DistutilsOptionError(
           '--enable-cuckoo can''t be used with --dynamic-linking')
-    if self.enable_dotnet and self.dynamic_linking:
-      raise distutils.errors.DistutilsOptionError(
-          '--enable-dotnet can''t be used with --dynamic-linking')
     if self.enable_dex and self.dynamic_linking:
       raise distutils.errors.DistutilsOptionError(
           '--enable-dex can''t be used with --dynamic-linking')
@@ -200,6 +193,22 @@ class BuildExtCommand(build_ext):
     building_for_freebsd = 'freebsd' in self.plat_name
     building_for_openbsd = 'openbsd' in self.plat_name # need testing
 
+    if building_for_windows:
+      arch = 'x86' if self.plat_name == 'win32' else 'x64'
+      openssl_include_dirs = [
+        os.path.join(base_dir, 'yara\\windows\\vs2015\\packages\\YARA.OpenSSL.{}.1.1.1\\include'.format(arch)),
+        os.path.join(base_dir, 'yara\\windows\\vs2017\\packages\\YARA.OpenSSL.{}.1.1.1\\include'.format(arch))
+      ]
+      openssl_library_dirs = [
+        os.path.join(base_dir, 'yara\\windows\\vs2015\\packages\\YARA.OpenSSL.{}.1.1.1\\lib'.format(arch)),
+        os.path.join(base_dir, 'yara\\windows\\vs2017\\packages\\YARA.OpenSSL.{}.1.1.1\\lib'.format(arch))
+      ]
+      openssl_libraries = ['libcrypto']
+    else:
+      openssl_include_dirs = []
+      openssl_library_dirs = []
+      openssl_libraries = ['crypto']
+
     if building_for_linux:
       module.define_macros.append(('_GNU_SOURCE', '1'))
       module.define_macros.append(('USE_LINUX_PROC', '1'))
@@ -258,17 +267,32 @@ class BuildExtCommand(build_ext):
     if self.dynamic_linking:
       module.libraries.append('yara')
     else:
-      if not self.define or not ('HASH_MODULE', '1') in self.define:
-        if (has_function('MD5_Init', include_dirs=module.include_dirs, libraries=['crypto'], library_dirs=module.library_dirs) and
-            has_function('SHA256_Init', include_dirs=module.include_dirs, libraries=['crypto'], library_dirs=module.library_dirs)):
-          module.define_macros.append(('HASH_MODULE', '1'))
-          module.define_macros.append(('HAVE_LIBCRYPTO', '1'))
-          module.libraries.append('crypto')
-        elif building_for_windows:
-          module.define_macros.append(('HASH_MODULE', '1'))
-          module.define_macros.append(('HAVE_WINCRYPT_H', '1'))
-        else:
-          exclusions.append('yara/libyara/modules/hash/hash.c')
+      # Is OpenSSL available?
+      if (has_function('OpenSSL_add_all_algorithms',
+                       includes=['openssl/evp.h'],
+                       include_dirs=module.include_dirs + openssl_include_dirs,
+                       libraries=module.libraries + openssl_libraries,
+                       library_dirs=module.library_dirs + openssl_library_dirs)
+          # In case OpenSSL is being linked statically
+          or has_function('OpenSSL_add_all_algorithms',
+                       includes=['openssl/evp.h'],
+                       include_dirs=module.include_dirs + openssl_include_dirs,
+                       libraries=module.libraries + openssl_libraries + ['dl', 'pthread', 'z'],
+                       library_dirs=module.library_dirs + openssl_library_dirs)
+          ):
+        module.define_macros.append(('HASH_MODULE', '1'))
+        module.define_macros.append(('HAVE_LIBCRYPTO', '1'))
+        module.libraries.extend(openssl_libraries)
+        module.include_dirs.extend(openssl_include_dirs)
+        module.library_dirs.extend(openssl_library_dirs)
+      elif building_for_windows:
+        # OpenSSL is not available, but in Windows we can rely on Wincrypt.
+        module.define_macros.append(('HASH_MODULE', '1'))
+        module.define_macros.append(('HAVE_WINCRYPT_H', '1'))
+      else:
+        # OpenSSL is not available, exclude hash.c, as it requires some hashing 
+        # functions.
+        exclusions.append('yara/libyara/modules/hash/hash.c')
 
       if self.enable_magic:
         module.define_macros.append(('MAGIC_MODULE', '1'))
@@ -282,11 +306,6 @@ class BuildExtCommand(build_ext):
       else:
         exclusions.append('yara/libyara/modules/cuckoo/cuckoo.c')
 
-      if self.enable_dotnet:
-        module.define_macros.append(('DOTNET_MODULE', '1'))
-      else:
-        exclusions.append('yara/libyara/modules/dotnet/dotnet.c')
-
       if self.enable_dex:
         module.define_macros.append(('DEX_MODULE', '1'))
       else:
@@ -301,6 +320,9 @@ class BuildExtCommand(build_ext):
       exclusions.append('yara/libyara/modules/pb_tests/pb_tests.c')
       exclusions.append('yara/libyara/modules/pb_tests/pb_tests.pb-c.c')
 
+      # Always turn on the DOTNET module.
+      module.define_macros.append(('DOTNET_MODULE', '1'))
+
       exclusions = [os.path.normpath(x) for x in exclusions]
 
       for directory, _, files in os.walk('yara/libyara/'):
@@ -350,7 +372,7 @@ with open('README.rst', 'r', 'utf-8') as f:
 
 setup(
     name='yara-python',
-    version='4.2.0',
+    version='4.3.1',
     description='Python interface for YARA',
     long_description=readme,
     license='Apache 2.0',
@@ -371,4 +393,5 @@ setup(
     ext_modules=[Extension(
         name='yara',
         include_dirs=['yara/libyara/include', 'yara/libyara/', '.'],
+        define_macros=[('BUCKETS_128', 1), ('CHECKSUM_1B', 1)],
         sources=['yara-python.c'])])
diff --git a/tests.py b/tests.py
index 549dd35..7ddcf25 100644
--- a/tests.py
+++ b/tests.py
@@ -1,7 +1,7 @@
 #!/usr/local/bin/python
 # -*- coding: utf-8 -*-
 #
-# Copyright (c) 2007-2021. The YARA Authors. All Rights Reserved.
+# Copyright (c) 2007-2022. The YARA Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -306,11 +306,12 @@ class TestYara(unittest.TestCase):
             matches = rule.match(data=string)
             if expected_result == SUCCEED:
                 self.assertTrue(matches)
-                _, _, matching_string = matches[0].strings[0]
+                matching_string = matches[0].strings[0]
+                instance = matching_string.instances[0]
                 if sys.version_info[0] >= 3:
-                    self.assertTrue(matching_string == bytes(test[3], 'utf-8'))
+                    self.assertTrue(instance.matched_data == bytes(test[3], 'utf-8'))
                 else:
-                    self.assertTrue(matching_string == test[3])
+                    self.assertTrue(instance.matched_data == test[3])
             else:
                 self.assertFalse(matches)
 
@@ -559,9 +560,13 @@ class TestYara(unittest.TestCase):
         matches = rules.match(data='abbb')
 
         if sys.version_info[0] >= 3:
-            self.assertTrue(matches[0].strings == [(0, '$a', bytes('ab', 'utf-8'))])
+            self.assertTrue(matches[0].strings[0].identifier == '$a')
+            self.assertTrue(matches[0].strings[0].instances[0].offset == 0)
+            self.assertTrue(matches[0].strings[0].instances[0].matched_data == bytes('ab', 'utf-8'))
         else:
-            self.assertTrue(matches[0].strings == [(0, '$a', 'ab')])
+            self.assertTrue(matches[0].strings[0].identifier == '$a')
+            self.assertTrue(matches[0].strings[0].instances[0].offset == 0)
+            self.assertTrue(matches[0].strings[0].instances[0].matched_data == 'ab')
 
     def testCount(self):
 
@@ -650,6 +655,58 @@ class TestYara(unittest.TestCase):
             'rule test { strings: $a = "ssi" condition: for all i in (1..#a) : (@a[i] == 5) }',
         ], 'mississipi')
 
+    def testXorKey(self):
+
+        global rule_data
+        rule_data = None
+
+        def callback(data):
+            global rule_data
+            rule_data = data
+            return yara.CALLBACK_CONTINUE
+
+        r = yara.compile(source='rule test { strings: $a = "dummy" xor(1-2) condition: $a }')
+        r.match(data='etllxfwoo{', callback=callback)
+
+        self.assertTrue(rule_data['matches'])
+        self.assertEqual(rule_data['rule'], 'test')
+        self.assertEqual(len(rule_data['strings']), 1)
+        string = rule_data['strings'][0]
+        self.assertEqual(len(string.instances), 2)
+        self.assertEqual(string.instances[0].xor_key, 1)
+        self.assertEqual(string.instances[1].xor_key, 2)
+
+        # Make sure plaintext() works.
+        self.assertTrue(string.instances[0].plaintext() == b'dummy')
+
+    # Test that the xor_key for matched strings is 0 if the string is not an xor
+    # string. We always want to make sure this is set!
+    def testXorKeyNoXorString(self):
+
+        global rule_data
+        rule_data = None
+
+        def callback(data):
+            global rule_data
+            rule_data = data
+            return yara.CALLBACK_CONTINUE
+
+        r = yara.compile(source='rule test { strings: $a = "dummy" condition: $a }')
+        r.match(data='dummy', callback=callback)
+
+        self.assertTrue(rule_data['matches'])
+        self.assertEqual(rule_data['rule'],'test')
+        self.assertEqual(len(rule_data['strings']), 1)
+        self.assertEqual(rule_data['strings'][0].instances[0].xor_key, 0)
+
+    def testMatchedLength(self):
+        yara.set_config(max_match_data=2)
+        r = yara.compile(source='rule test { strings: $a = "dummy" condition: $a }')
+        matches  = r.match(data='dummy')
+        self.assertEqual(matches[0].strings[0].instances[0].matched_length, 5)
+        self.assertEqual(matches[0].strings[0].instances[0].matched_data, b'du')
+        yara.set_config(max_match_data=512)
+
     def testRE(self):
 
         self.assertTrueRules([
@@ -661,8 +718,8 @@ class TestYara(unittest.TestCase):
             'rule test { strings: $a = /(M|N)iss/ nocase condition: $a }',
             'rule test { strings: $a = /[M-N]iss/ nocase condition: $a }',
             'rule test { strings: $a = /(Mi|ssi)ssippi/ nocase condition: $a }',
-            'rule test { strings: $a = /ppi\tmi/ condition: $a }',
-            r'rule test { strings: $a = /ppi\.mi/ condition: $a }',
+            r'rule test { strings: $a = /ppi\tmi/ condition: $a }',
+            'rule test { strings: $a = /ppi\.mi/ condition: $a }',
             'rule test { strings: $a = /^mississippi/ fullword condition: $a }',
             'rule test { strings: $a = /mississippi.*mississippi$/s condition: $a }',
         ], 'mississippi\tmississippi.mississippi\nmississippi')
@@ -752,6 +809,28 @@ class TestYara(unittest.TestCase):
         self.assertTrue(meta['b'] == 'ñ')
         self.assertTrue(meta['c'] == 'ñ')
 
+    # This test is similar to testScanMeta but it tests for displaying multiple values in the meta data generated
+    # when a Match object is created (upon request).
+    def testDuplicateMeta(self):
+        r = yara.compile(source="""
+        rule test {
+            meta:
+                a = 1
+                a = 2
+                b = 3
+            condition:
+                true
+        }
+        """)
+
+        # Default behaviour should produce a simple KV map and should use the 'latest' metadata value per field
+        meta = r.match(data="dummy")[0].meta
+        self.assertTrue(meta['a'] == 2 and meta['b'] == 3)
+
+        # `allow_duplicate_metadata` flag should reveal all metadata values per field as a list
+        meta = r.match(data="dummy", allow_duplicate_metadata=True)[0].meta
+        self.assertTrue(meta['a'] == [1, 2] and meta['b'] == [3])
+
     def testFilesize(self):
 
         self.assertTrueRules([
@@ -1132,6 +1211,31 @@ class TestYara(unittest.TestCase):
         self.assertTrue(warnings_callback_message.rule == "x")
         self.assertTrue(warnings_callback_message.string == "$x")
 
+    def testCompilerErrorOnWarning(self):
+        # Make sure we always throw on warnings if requested, and that warnings
+        # are accumulated.
+
+        rules = """
+        rule a { strings: $a = "A" condition: $a }
+        rule b { strings: $b = "B" condition: $b }
+        """
+
+        expected = [
+            'line 2: string "$a" may slow down scanning',
+            'line 3: string "$b" may slow down scanning',
+        ]
+
+        with self.assertRaises(yara.WarningError) as ctx:
+            yara.compile(source=rules, error_on_warning=True)
+
+        e = ctx.exception
+        self.assertListEqual(e.warnings, expected)
+
+        # Now make sure the warnings member is set if error_on_warning is not
+        # set.
+        rules = yara.compile(source=rules)
+        self.assertListEqual(rules.warnings, expected)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/yara-python.c b/yara-python.c
index 6da6a13..1d851b9 100644
--- a/yara-python.c
+++ b/yara-python.c
@@ -1,5 +1,5 @@
 /*
-Copyright (c) 2007-2013. The YARA Authors. All Rights Reserved.
+Copyright (c) 2007-2022. The YARA Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -74,7 +74,7 @@ static PyObject* YaraWarningError = NULL;
 This module allows you to apply YARA rules to files or strings.\n\
 \n\
 For complete documentation please visit:\n\
-https://plusvic.github.io/yara\n"
+https://yara.readthedocs.io/en/stable/yarapython.html\n"
 
 #if defined(_WIN32) || defined(__CYGWIN__)
 #include <string.h>
@@ -205,6 +205,231 @@ static PyTypeObject Match_Type = {
   0,                          /* tp_new */
 };
 
+// StringMatch object
+
+typedef struct
+{
+  PyObject_HEAD
+  PyObject* identifier;
+  PyObject* instances;
+  // This is not exposed directly because it contains flags that are internal
+  // to yara (eg: STRING_FLAGS_FITS_IN_ATOM) along with modifiers
+  // (eg: STRING_FLAGS_XOR).
+  uint64_t flags;
+} StringMatch;
+
+static PyMemberDef StringMatch_members[] = {
+  {
+    "identifier",
+    T_OBJECT_EX,
+    offsetof(StringMatch, identifier),
+    READONLY,
+    "Name of the matching string"
+  },
+  {
+    "instances",
+    T_OBJECT_EX,
+    offsetof(StringMatch, instances),
+    READONLY,
+    "StringMatchInstance objects of the matching string"
+  },
+  { NULL } // End marker
+};
+
+static PyObject* StringMatch_NEW(
+    const char* identifier,
+    uint64_t flags,
+    PyObject* instance_list);
+
+static void StringMatch_dealloc(
+  PyObject* self);
+
+static PyObject* StringMatch_repr(
+    PyObject* self);
+
+static PyObject* StringMatch_getattro(
+    PyObject* self,
+    PyObject* name);
+
+static Py_hash_t StringMatch_hash(
+    PyObject* self);
+
+static PyObject* StringMatch_is_xor(
+    PyObject* self,
+    PyObject* args);
+
+
+static PyMethodDef StringMatch_methods[] =
+{
+  {
+    "is_xor",
+    (PyCFunction) StringMatch_is_xor,
+    METH_NOARGS,
+    "Return true if a string has the xor modifier"
+  },
+  { NULL },
+};
+
+static PyTypeObject StringMatch_Type = {
+  PyVarObject_HEAD_INIT(NULL, 0)
+  "yara.StringMatch",               /*tp_name*/
+  sizeof(StringMatch),              /*tp_basicsize*/
+  0,                                /*tp_itemsize*/
+  (destructor)StringMatch_dealloc,  /*tp_dealloc*/
+  0,                                /*tp_print*/
+  0,                                /*tp_getattr*/
+  0,                                /*tp_setattr*/
+  0,                                /*tp_compare*/
+  StringMatch_repr,                 /*tp_repr*/
+  0,                                /*tp_as_number*/
+  0,                                /*tp_as_sequence*/
+  0,                                /*tp_as_mapping*/
+  StringMatch_hash,                 /*tp_hash */
+  0,                                /*tp_call*/
+  0,                                /*tp_str*/
+  StringMatch_getattro,             /*tp_getattro*/
+  0,                                /*tp_setattro*/
+  0,                                /*tp_as_buffer*/
+  Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
+  "StringMatch class",              /* tp_doc */
+  0,                                /* tp_traverse */
+  0,                                /* tp_clear */
+  0,                                /* tp_richcompare */ // XXX: Implement richcompare?
+  0,                                /* tp_weaklistoffset */
+  0,                                /* tp_iter */
+  0,                                /* tp_iternext */
+  StringMatch_methods,              /* tp_methods */
+  StringMatch_members,              /* tp_members */
+  0,                                /* tp_getset */
+  0,                                /* tp_base */
+  0,                                /* tp_dict */
+  0,                                /* tp_descr_get */
+  0,                                /* tp_descr_set */
+  0,                                /* tp_dictoffset */
+  0,                                /* tp_init */
+  0,                                /* tp_alloc */
+  0,                                /* tp_new */
+};
+
+// StringMatchInstance object
+
+typedef struct
+{
+  PyObject_HEAD
+  PyObject* offset;
+  PyObject* matched_data;
+  PyObject* matched_length;
+  PyObject* xor_key;
+} StringMatchInstance;
+
+static PyMemberDef StringMatchInstance_members[] = {
+  {
+    "offset",
+    T_OBJECT_EX,
+    offsetof(StringMatchInstance, offset),
+    READONLY,
+    "Offset of the matched data"
+  },
+  {
+    "matched_data",
+    T_OBJECT_EX,
+    offsetof(StringMatchInstance, matched_data),
+    READONLY,
+    "Matched data"
+  },
+  {
+    "matched_length",
+    T_OBJECT_EX,
+    offsetof(StringMatchInstance, matched_length),
+    READONLY,
+    "Length of matched data"
+  },
+  {
+    "xor_key",
+    T_OBJECT_EX,
+    offsetof(StringMatchInstance, xor_key),
+    READONLY,
+    "XOR key found for xor strings"
+  },
+  { NULL } // End marker
+};
+
+static PyObject* StringMatchInstance_NEW(
+    uint64_t offset,
+    PyObject* matched_data,
+    int32_t match_length,
+    uint8_t xor_key);
+
+static void StringMatchInstance_dealloc(
+  PyObject* self);
+
+static PyObject* StringMatchInstance_repr(
+    PyObject* self);
+
+static PyObject* StringMatchInstance_getattro(
+    PyObject* self,
+    PyObject* name);
+
+static Py_hash_t StringMatchInstance_hash(
+    PyObject* self);
+
+static PyObject* StringMatchInstance_plaintext(
+    PyObject* self,
+    PyObject* args);
+
+
+static PyMethodDef StringMatchInstance_methods[] =
+{
+  {
+    "plaintext",
+    (PyCFunction) StringMatchInstance_plaintext,
+    METH_NOARGS,
+    "Return matched data after xor key applied."
+  },
+  { NULL },
+};
+
+static PyTypeObject StringMatchInstance_Type = {
+  PyVarObject_HEAD_INIT(NULL, 0)
+  "yara.StringMatchInstance",               /*tp_name*/
+  sizeof(StringMatchInstance),              /*tp_basicsize*/
+  0,                                        /*tp_itemsize*/
+  (destructor)StringMatchInstance_dealloc,  /*tp_dealloc*/
+  0,                                        /*tp_print*/
+  0,                                        /*tp_getattr*/
+  0,                                        /*tp_setattr*/
+  0,                                        /*tp_compare*/
+  StringMatchInstance_repr,                 /*tp_repr*/
+  0,                                        /*tp_as_number*/
+  0,                                        /*tp_as_sequence*/
+  0,                                        /*tp_as_mapping*/
+  StringMatchInstance_hash,                 /*tp_hash */
+  0,                                        /*tp_call*/
+  0,                                        /*tp_str*/
+  StringMatchInstance_getattro,             /*tp_getattro*/
+  0,                                        /*tp_setattro*/
+  0,                                        /*tp_as_buffer*/
+  Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
+  "StringMatchInstance class",              /* tp_doc */
+  0,                                        /* tp_traverse */
+  0,                                        /* tp_clear */
+  0,                                        /* tp_richcompare */ // XXX: Implement richcompare?
+  0,                                        /* tp_weaklistoffset */
+  0,                                        /* tp_iter */
+  0,                                        /* tp_iternext */
+  StringMatchInstance_methods,              /* tp_methods */
+  StringMatchInstance_members,              /* tp_members */
+  0,                                        /* tp_getset */
+  0,                                        /* tp_base */
+  0,                                        /* tp_dict */
+  0,                                        /* tp_descr_get */
+  0,                                        /* tp_descr_set */
+  0,                                        /* tp_dictoffset */
+  0,                                        /* tp_init */
+  0,                                        /* tp_alloc */
+  0,                                        /* tp_new */
+};
+
 // Rule object
 
 typedef struct
@@ -316,6 +541,7 @@ typedef struct
 {
   PyObject_HEAD
   PyObject* externals;
+  PyObject* warnings;
   YR_RULES* rules;
   YR_RULE* iter_current_rule;
 } Rules;
@@ -347,6 +573,17 @@ static PyObject* Rules_getattro(
 static PyObject* Rules_next(
     PyObject* self);
 
+static PyMemberDef Rules_members[] = {
+  {
+    "warnings",
+    T_OBJECT_EX,
+    offsetof(Rules, warnings),
+    READONLY,
+    "List of compiler warnings"
+  },
+  { NULL } // End marker
+};
+
 static PyMethodDef Rules_methods[] =
 {
   {
@@ -399,7 +636,7 @@ static PyTypeObject Rules_Type = {
   PyObject_SelfIter,          /* tp_iter */
   (iternextfunc) Rules_next,  /* tp_iternext */
   Rules_methods,              /* tp_methods */
-  0,                          /* tp_members */
+  Rules_members,              /* tp_members */
   0,                          /* tp_getset */
   0,                          /* tp_base */
   0,                          /* tp_dict */
@@ -420,6 +657,7 @@ typedef struct _CALLBACK_DATA
   PyObject* warnings_callback;
   PyObject* console_callback;
   int which;
+  bool allow_duplicate_metadata;
 
 } CALLBACK_DATA;
 
@@ -873,7 +1111,6 @@ _exit:
 #define CALLBACK_NON_MATCHES 0x02
 #define CALLBACK_ALL CALLBACK_MATCHES | CALLBACK_NON_MATCHES
 
-
 int yara_callback(
     YR_SCAN_CONTEXT* context,
     int message,
@@ -888,12 +1125,13 @@ int yara_callback(
   const char* tag;
 
   PyObject* tag_list = NULL;
+  PyObject* string_instance_list = NULL;
   PyObject* string_list = NULL;
   PyObject* meta_list = NULL;
+  PyObject* string_match_instance = NULL;
   PyObject* match;
   PyObject* callback_dict;
   PyObject* object;
-  PyObject* tuple;
   PyObject* matches = ((CALLBACK_DATA*) user_data)->matches;
   PyObject* callback = ((CALLBACK_DATA*) user_data)->callback;
   PyObject* callback_result;
@@ -975,27 +1213,83 @@ int yara_callback(
     else
       object = PY_STRING(meta->string);
 
-    PyDict_SetItemString(meta_list, meta->identifier, object);
-    Py_DECREF(object);
+    if (((CALLBACK_DATA*) user_data)->allow_duplicate_metadata){
+      // Check if we already have an array under this key
+      PyObject* existing_item = PyDict_GetItemString(meta_list, meta->identifier);
+      // Append object to existing list
+      if (existing_item)
+        PyList_Append(existing_item, object);
+      else{
+        //Otherwise, instantiate array and append object as first item
+        PyObject* new_list = PyList_New(0);
+        PyList_Append(new_list, object);
+        PyDict_SetItemString(meta_list, meta->identifier, new_list);
+        Py_DECREF(new_list);
+      }
+    }
+    else{
+      PyDict_SetItemString(meta_list, meta->identifier, object);
+      Py_DECREF(object);
+    }
   }
 
   yr_rule_strings_foreach(rule, string)
   {
+    // If this string is not a match, skip it. We have to check for this here
+    // and not rely on it in yr_string_matches_foreach macro because we need
+    // to create the string match instance list before we make the items that
+    // go in it.
+    if (context->matches[string->idx].head == NULL)
+      continue;
+
+    string_instance_list = PyList_New(0);
+
+    if (string_instance_list == NULL)
+    {
+        PyErr_Format(PyExc_TypeError, "out of memory");
+        return CALLBACK_ERROR;
+    }
+
+
     yr_string_matches_foreach(context, string, m)
     {
       object = PyBytes_FromStringAndSize((char*) m->data, m->data_length);
 
-      tuple = Py_BuildValue(
-          "(L,s,O)",
+      string_match_instance = StringMatchInstance_NEW(
           m->base + m->offset,
-          string->identifier,
-          object);
+          object,
+          m->match_length,
+          m->xor_key);
+
+      if (string_match_instance == NULL)
+      {
+        Py_DECREF(object);
+        PyErr_Format(PyExc_TypeError, "out of memory");
+        return CALLBACK_ERROR;
+      }
 
-      PyList_Append(string_list, tuple);
+      PyList_Append(string_instance_list, string_match_instance);
 
       Py_DECREF(object);
-      Py_DECREF(tuple);
+      Py_DECREF(string_match_instance);
     }
+
+    object = StringMatch_NEW(
+        string->identifier,
+        string->flags,
+        string_instance_list);
+
+    if (object == NULL)
+    {
+        PyErr_Format(PyExc_TypeError, "out of memory");
+        return CALLBACK_ERROR;
+    }
+
+
+    Py_DECREF(string_instance_list);
+
+    PyList_Append(string_list, object);
+    Py_DECREF(object);
   }
 
   if (message == CALLBACK_MSG_RULE_MATCHING)
@@ -1486,6 +1780,176 @@ static Py_hash_t Match_hash(
   return PyObject_Hash(match->rule) + PyObject_Hash(match->ns);
 }
 
+
+////////////////////////////////////////////////////////////////////////////////
+
+
+static PyObject* StringMatch_NEW(
+    const char* identifier,
+    uint64_t flags,
+    PyObject* instance_list)
+{
+  StringMatch* object = PyObject_NEW(StringMatch, &StringMatch_Type);
+
+  if (object != NULL)
+  {
+    object->identifier = PY_STRING(identifier);
+    object->flags = flags;
+    object->instances = instance_list;
+
+    Py_INCREF(instance_list);
+  }
+
+  return (PyObject*) object;
+}
+
+
+static void StringMatch_dealloc(
+    PyObject* self)
+{
+  StringMatch* object = (StringMatch*) self;
+
+  Py_DECREF(object->identifier);
+  Py_DECREF(object->instances);
+
+  PyObject_Del(self);
+}
+
+
+static PyObject* StringMatch_repr(
+    PyObject* self)
+{
+  StringMatch* object = (StringMatch*) self;
+  Py_INCREF(object->identifier);
+  return object->identifier;
+}
+
+
+static PyObject* StringMatch_getattro(
+    PyObject* self,
+    PyObject* name)
+{
+  return PyObject_GenericGetAttr(self, name);
+}
+
+
+// Hashing on just identifiers can be tricky as there can be duplicate
+// identifiers between rules and there are anonymous strings too. Be careful
+// when using this!
+static Py_hash_t StringMatch_hash(
+    PyObject* self)
+{
+  return PyObject_Hash(((StringMatch*) self)->identifier);
+}
+
+
+static PyObject* StringMatch_is_xor(
+    PyObject* self,
+    PyObject* args)
+{
+  if (((StringMatch*) self)->flags & STRING_FLAGS_XOR)
+    Py_RETURN_TRUE;
+
+  Py_RETURN_FALSE;
+}
+
+
+////////////////////////////////////////////////////////////////////////////////
+
+
+static PyObject* StringMatchInstance_NEW(
+    uint64_t offset,
+    PyObject* matched_data,
+    int32_t match_length,
+    uint8_t xor_key)
+{
+  StringMatchInstance* object = PyObject_NEW(StringMatchInstance, &StringMatchInstance_Type);
+
+  if (object != NULL)
+  {
+    object->offset = PyLong_FromLongLong(offset);
+    object->matched_data = matched_data;
+    object->matched_length = PyLong_FromLong(match_length);
+    object->xor_key = PyLong_FromUnsignedLong((uint32_t) xor_key);
+
+    Py_INCREF(matched_data);
+  }
+
+  return (PyObject*) object;
+}
+
+
+static void StringMatchInstance_dealloc(
+    PyObject* self)
+{
+  StringMatchInstance* object = (StringMatchInstance*) self;
+
+  Py_DECREF(object->offset);
+  Py_DECREF(object->matched_data);
+  Py_DECREF(object->xor_key);
+
+  PyObject_Del(self);
+}
+
+
+static PyObject* StringMatchInstance_repr(
+    PyObject* self)
+{
+  StringMatchInstance* object = (StringMatchInstance*) self;
+  return PyCodec_Decode(object->matched_data, "utf-8", "backslashreplace");
+}
+
+
+static PyObject* StringMatchInstance_getattro(
+    PyObject* self,
+    PyObject* name)
+{
+  return PyObject_GenericGetAttr(self, name);
+}
+
+
+static Py_hash_t StringMatchInstance_hash(
+    PyObject* self)
+{
+  return PyObject_Hash(((StringMatchInstance*) self)->matched_data);
+}
+
+
+static PyObject* StringMatchInstance_plaintext(
+    PyObject* self,
+    PyObject* args)
+{
+  char* pb;
+  Py_ssize_t length;
+
+  StringMatchInstance* instance = (StringMatchInstance*) self;
+  uint64_t xor_key = PyLong_AsUnsignedLongLong(instance->xor_key);
+  if (xor_key == 0)
+      return instance->matched_data;
+
+  int result = PyBytes_AsStringAndSize(instance->matched_data, &pb, &length);
+  if (result == -1)
+    return NULL;
+
+  // pb points to an internal buffer of the bytes object which we can not
+  // modify. Allocate a new buffer, copy the contents over and do the xor, then
+  // create a new bytes object to return.
+  uint8_t* buf = (uint8_t*) calloc(length, sizeof(uint8_t));
+  if (buf == NULL)
+    return PyErr_Format(PyExc_TypeError, "Out of memory");
+
+  memcpy(buf, pb, length);
+  for (size_t i = 0; i < length; i++) {
+    buf[i] = ((uint8_t) pb[i]) ^ xor_key;
+  }
+
+  PyObject* object = PyBytes_FromStringAndSize((char*) buf, length);
+  free(buf);
+
+  return object;
+}
+
+
 ////////////////////////////////////////////////////////////////////////////////
 
 
@@ -1517,6 +1981,7 @@ static Rules* Rules_NEW(void)
   {
     rules->rules = NULL;
     rules->externals = NULL;
+    rules->warnings = NULL;
   }
 
   return rules;
@@ -1528,6 +1993,7 @@ static void Rules_dealloc(
   Rules* object = (Rules*) self;
 
   Py_XDECREF(object->externals);
+  Py_XDECREF(object->warnings);
 
   if (object->rules != NULL)
     yr_rules_destroy(object->rules);
@@ -1580,8 +2046,9 @@ static PyObject* Rules_next(
       else
         object = PY_STRING(meta->string);
 
-      PyDict_SetItemString(meta_list, meta->identifier, object);
-      Py_DECREF(object);
+        PyDict_SetItemString(meta_list, meta->identifier, object);
+        Py_DECREF(object);
+
     }
 
     rule->global = PyBool_FromLong(rules->iter_current_rule->flags & RULE_FLAGS_GLOBAL);
@@ -1609,7 +2076,7 @@ static PyObject* Rules_match(
       "filepath", "pid", "data", "externals",
       "callback", "fast", "timeout", "modules_data",
       "modules_callback", "which_callbacks", "warnings_callback",
-      "console_callback", NULL
+      "console_callback", "allow_duplicate_metadata", NULL
       };
 
   char* filepath = NULL;
@@ -1634,11 +2101,12 @@ static PyObject* Rules_match(
   callback_data.warnings_callback = NULL;
   callback_data.console_callback = NULL;
   callback_data.which = CALLBACK_ALL;
+  callback_data.allow_duplicate_metadata = false;
 
   if (PyArg_ParseTupleAndKeywords(
         args,
         keywords,
-        "|sis*OOOiOOiOO",
+        "|sis*OOOiOOiOOb",
         kwlist,
         &filepath,
         &pid,
@@ -1651,7 +2119,8 @@ static PyObject* Rules_match(
         &callback_data.modules_callback,
         &callback_data.which,
         &callback_data.warnings_callback,
-        &callback_data.console_callback))
+        &callback_data.console_callback,
+        &callback_data.allow_duplicate_metadata))
   {
     if (filepath == NULL && data.buf == NULL && pid == -1)
     {
@@ -1715,6 +2184,9 @@ static PyObject* Rules_match(
       }
     }
 
+    if (callback_data.allow_duplicate_metadata == NULL)
+      callback_data.allow_duplicate_metadata = false;
+
     if (yr_scanner_create(object->rules, &scanner) != 0)
     {
       return PyErr_Format(
@@ -2435,8 +2907,6 @@ static PyObject* yara_compile(
       PyErr_SetObject(YaraWarningError, warnings);
     }
 
-    Py_DECREF(warnings);
-
     if (PyErr_Occurred() == NULL)
     {
       rules = Rules_NEW();
@@ -2451,6 +2921,7 @@ static PyObject* yara_compile(
         {
           rules->rules = yara_rules;
           rules->iter_current_rule = rules->rules->rules_table;
+          rules->warnings = warnings;
 
           if (externals != NULL && externals != Py_None)
             rules->externals = PyDict_Copy(externals);
@@ -2641,6 +3112,7 @@ static PyMethodDef yara_methods[] = {
 static PyObject* YaraWarningError_getwarnings(PyObject *self, void* closure)
 {
   PyObject *args = PyObject_GetAttrString(self, "args");
+
   if (!args) {
     return NULL;
   }
@@ -2648,6 +3120,7 @@ static PyObject* YaraWarningError_getwarnings(PyObject *self, void* closure)
   PyObject* ret = PyTuple_GetItem(args, 0);
   Py_XINCREF(ret);
   Py_XDECREF(args);
+
   return ret;
 }
 
@@ -2710,11 +3183,19 @@ MOD_INIT(yara)
   if (PyType_Ready(&Match_Type) < 0)
     return MOD_ERROR_VAL;
 
+  if (PyType_Ready(&StringMatch_Type) < 0)
+    return MOD_ERROR_VAL;
+
+  if (PyType_Ready(&StringMatchInstance_Type) < 0)
+    return MOD_ERROR_VAL;
+
   PyStructSequence_InitType(&RuleString_Type, &RuleString_Desc);
 
   PyModule_AddObject(m, "Rule", (PyObject*) &Rule_Type);
   PyModule_AddObject(m, "Rules", (PyObject*) &Rules_Type);
   PyModule_AddObject(m, "Match",  (PyObject*) &Match_Type);
+  PyModule_AddObject(m, "StringMatch",  (PyObject*) &StringMatch_Type);
+  PyModule_AddObject(m, "StringMatchInstance",  (PyObject*) &StringMatchInstance_Type);
 
   PyModule_AddObject(m, "Error", YaraError);
   PyModule_AddObject(m, "SyntaxError", YaraSyntaxError);
@@ -2727,6 +3208,30 @@ MOD_INIT(yara)
     return MOD_ERROR_VAL;
   }
 
+  PyObject* module_names_list = PyList_New(0);
+
+  if (module_names_list == NULL)
+  {
+    PyErr_SetString(YaraError, "module list error");
+    return MOD_ERROR_VAL;
+  }
+
+  for (YR_MODULE* module = yr_modules_get_table(); module->name != NULL; module++)
+  {
+    PyObject* module_name = PY_STRING(module->name);
+    if (module_name == NULL)
+    {
+      PyErr_SetString(YaraError, "module name error");
+      return MOD_ERROR_VAL;
+    }
+    if (PyList_Append(module_names_list, module_name) < 0)
+    {
+      PyErr_SetString(YaraError, "module name error");
+      return MOD_ERROR_VAL;
+    }
+  }
+  PyModule_AddObject(m, "modules", module_names_list);
+
   Py_AtExit(finalize);
 
   return MOD_SUCCESS_VAL(m);

More details

Full run details

Historical runs