forked from elastic/detection-rules
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathintegrations.py
443 lines (340 loc) · 20.4 KB
/
integrations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
# or more contributor license agreements. Licensed under the Elastic License
# 2.0; you may not use this file except in compliance with the Elastic License
# 2.0.
"""Functions to support and interact with Kibana integrations."""
import glob
import gzip
import json
import re
from collections import defaultdict, OrderedDict
from pathlib import Path
from typing import Generator, List, Tuple, Union, Optional
import requests
from semver import Version
import yaml
from marshmallow import EXCLUDE, Schema, fields, post_load
import kql
from . import ecs
from .config import load_current_package_version
from .beats import flatten_ecs_schema
from .utils import cached, get_etc_path, read_gzip, unzip
from .schemas import definitions
MANIFEST_FILE_PATH = get_etc_path('integration-manifests.json.gz')
NUM_LATEST_RULE_VERSIONS = 1
SCHEMA_FILE_PATH = get_etc_path('integration-schemas.json.gz')
_notified_integrations = set()
@cached
def load_integrations_manifests() -> dict:
"""Load the consolidated integrations manifest."""
return json.loads(read_gzip(get_etc_path('integration-manifests.json.gz')))
@cached
def load_integrations_schemas() -> dict:
"""Load the consolidated integrations schemas."""
return json.loads(read_gzip(get_etc_path('integration-schemas.json.gz')))
class IntegrationManifestSchema(Schema):
name = fields.Str(required=True)
version = fields.Str(required=True)
release = fields.Str(required=True)
description = fields.Str(required=True)
download = fields.Str(required=True)
conditions = fields.Dict(required=True)
policy_templates = fields.List(fields.Dict)
owner = fields.Dict(required=False)
@post_load
def transform_policy_template(self, data, **kwargs):
if "policy_templates" in data:
data["policy_templates"] = [policy["name"] for policy in data["policy_templates"]]
return data
def build_integrations_manifest(overwrite: bool, rule_integrations: list = [],
integration: str = None, prerelease: bool = False) -> None:
"""Builds a new local copy of manifest.yaml from integrations Github."""
def write_manifests(integrations: dict) -> None:
manifest_file = gzip.open(MANIFEST_FILE_PATH, "w+")
manifest_file_bytes = json.dumps(integrations).encode("utf-8")
manifest_file.write(manifest_file_bytes)
manifest_file.close()
if overwrite:
if MANIFEST_FILE_PATH.exists():
MANIFEST_FILE_PATH.unlink()
final_integration_manifests = {integration: {} for integration in rule_integrations} \
or {integration: {}}
rule_integrations = rule_integrations or [integration]
for integration in rule_integrations:
integration_manifests = get_integration_manifests(integration, prerelease=prerelease)
for manifest in integration_manifests:
validated_manifest = IntegrationManifestSchema(unknown=EXCLUDE).load(manifest)
package_version = validated_manifest.pop("version")
final_integration_manifests[integration][package_version] = validated_manifest
if overwrite and rule_integrations:
write_manifests(final_integration_manifests)
elif integration and not overwrite:
manifest_file = gzip.open(MANIFEST_FILE_PATH, "rb")
manifest_file_bytes = manifest_file.read()
manifest_file_contents = json.loads(manifest_file_bytes.decode("utf-8"))
manifest_file.close()
manifest_file_contents[integration] = final_integration_manifests[integration]
write_manifests(manifest_file_contents)
print(f"final integrations manifests dumped: {MANIFEST_FILE_PATH}")
def build_integrations_schemas(overwrite: bool, integration: str = None) -> None:
"""Builds a new local copy of integration-schemas.json.gz from EPR integrations."""
saved_integration_schemas = {}
# Check if the file already exists and handle accordingly
if overwrite and SCHEMA_FILE_PATH.exists():
SCHEMA_FILE_PATH.unlink()
final_integration_schemas = {}
elif SCHEMA_FILE_PATH.exists():
final_integration_schemas = load_integrations_schemas()
else:
final_integration_schemas = {}
# Load the integration manifests
integration_manifests = load_integrations_manifests()
# if a single integration is specified, only process that integration
if integration:
if integration in integration_manifests:
integration_manifests = {integration: integration_manifests[integration]}
else:
raise ValueError(f"Integration {integration} not found in manifest.")
# Loop through the packages and versions
for package, versions in integration_manifests.items():
print(f"processing {package}")
final_integration_schemas.setdefault(package, {})
for version, manifest in versions.items():
if package in saved_integration_schemas and version in saved_integration_schemas[package]:
continue
# Download the zip file
download_url = f"https://epr.elastic.co{manifest['download']}"
response = requests.get(download_url)
response.raise_for_status()
# Update the final integration schemas
final_integration_schemas[package].update({version: {}})
# Open the zip file
with unzip(response.content) as zip_ref:
for file in zip_ref.namelist():
file_data_bytes = zip_ref.read(file)
# Check if the file is a match
if glob.fnmatch.fnmatch(file, '*/fields/*.yml'):
integration_name = Path(file).parent.parent.name
final_integration_schemas[package][version].setdefault(integration_name, {})
schema_fields = yaml.safe_load(file_data_bytes)
# Parse the schema and add to the integration_manifests
data = flatten_ecs_schema(schema_fields)
flat_data = {field['name']: field['type'] for field in data}
final_integration_schemas[package][version][integration_name].update(flat_data)
# add machine learning jobs to the schema
if package in list(map(str.lower, definitions.MACHINE_LEARNING_PACKAGES)):
if glob.fnmatch.fnmatch(file, '*/ml_module/*ml.json'):
ml_module = json.loads(file_data_bytes)
job_ids = [job['id'] for job in ml_module['attributes']['jobs']]
final_integration_schemas[package][version]['jobs'] = job_ids
del file_data_bytes
# Write the final integration schemas to disk
with gzip.open(SCHEMA_FILE_PATH, "w") as schema_file:
schema_file_bytes = json.dumps(final_integration_schemas).encode("utf-8")
schema_file.write(schema_file_bytes)
print(f"final integrations manifests dumped: {SCHEMA_FILE_PATH}")
def find_least_compatible_version(package: str, integration: str,
current_stack_version: str, packages_manifest: dict) -> str:
"""Finds least compatible version for specified integration based on stack version supplied."""
integration_manifests = {k: v for k, v in sorted(packages_manifest[package].items(),
key=lambda x: Version.parse(x[0]))}
current_stack_version = Version.parse(current_stack_version, optional_minor_and_patch=True)
# filter integration_manifests to only the latest major entries
major_versions = sorted(list(set([Version.parse(manifest_version).major
for manifest_version in integration_manifests])), reverse=True)
for max_major in major_versions:
major_integration_manifests = \
{k: v for k, v in integration_manifests.items() if Version.parse(k).major == max_major}
# iterates through ascending integration manifests
# returns latest major version that is least compatible
for version, manifest in OrderedDict(sorted(major_integration_manifests.items(),
key=lambda x: Version.parse(x[0]))).items():
compatible_versions = re.sub(r"\>|\<|\=|\^", "", manifest["conditions"]["kibana"]["version"]).split(" || ")
for kibana_ver in compatible_versions:
kibana_ver = Version.parse(kibana_ver)
# check versions have the same major
if kibana_ver.major == current_stack_version.major:
if kibana_ver <= current_stack_version:
return f"^{version}"
raise ValueError(f"no compatible version for integration {package}:{integration}")
def find_latest_compatible_version(package: str, integration: str,
rule_stack_version: Version,
packages_manifest: dict) -> Union[None, Tuple[str, str]]:
"""Finds least compatible version for specified integration based on stack version supplied."""
if not package:
raise ValueError("Package must be specified")
package_manifest = packages_manifest.get(package)
if package_manifest is None:
raise ValueError(f"Package {package} not found in manifest.")
# Converts the dict keys (version numbers) to Version objects for proper sorting (descending)
integration_manifests = sorted(package_manifest.items(), key=lambda x: Version.parse(x[0]), reverse=True)
notice = ""
for version, manifest in integration_manifests:
kibana_conditions = manifest.get("conditions", {}).get("kibana", {})
version_requirement = kibana_conditions.get("version")
if not version_requirement:
raise ValueError(f"Manifest for {package}:{integration} version {version} is missing conditions.")
compatible_versions = re.sub(r"\>|\<|\=|\^", "", version_requirement).split(" || ")
if not compatible_versions:
raise ValueError(f"Manifest for {package}:{integration} version {version} is missing compatible versions")
highest_compatible_version = Version.parse(max(compatible_versions,
key=lambda x: Version.parse(x)))
if highest_compatible_version > rule_stack_version:
# generate notice message that a later integration version is available
integration = f" {integration.strip()}" if integration else ""
notice = (f"There is a new integration {package}{integration} version {version} available!",
f"Update the rule min_stack version from {rule_stack_version} to "
f"{highest_compatible_version} if using new features in this latest version.")
if highest_compatible_version.major == rule_stack_version.major:
return version, notice
else:
# Check for rules that cross majors
for compatible_version in compatible_versions:
if Version.parse(compatible_version) <= rule_stack_version:
return version, notice
raise ValueError(f"no compatible version for integration {package}:{integration}")
def get_integration_manifests(integration: str, prerelease: Optional[bool] = False,
kibana_version: Optional[str] = "") -> list:
"""Iterates over specified integrations from package-storage and combines manifests per version."""
epr_search_url = "https://epr.elastic.co/search"
if not prerelease:
prerelease = "false"
else:
prerelease = "true"
# link for search parameters - https://github.com/elastic/package-registry
epr_search_parameters = {"package": f"{integration}", "prerelease": prerelease,
"all": "true", "include_policy_templates": "true"}
if kibana_version:
epr_search_parameters["kibana.version"] = kibana_version
epr_search_response = requests.get(epr_search_url, params=epr_search_parameters, timeout=10)
epr_search_response.raise_for_status()
manifests = epr_search_response.json()
if not manifests:
raise ValueError(f"EPR search for {integration} integration package returned empty list")
sorted_manifests = sorted(manifests, key=lambda p: Version.parse(p["version"]), reverse=True)
print(f"loaded {integration} manifests from the following package versions: "
f"{[manifest['version'] for manifest in sorted_manifests]}")
return manifests
def find_latest_integration_version(integration: str, maturity: str, stack_version: Version) -> Version:
"""Finds the latest integration version based on maturity and stack version"""
prerelease = False if maturity == "ga" else True
existing_pkgs = get_integration_manifests(integration, prerelease, str(stack_version))
if maturity == "ga":
existing_pkgs = [pkg for pkg in existing_pkgs if not
Version.parse(pkg["version"]).prerelease]
if maturity == "beta":
existing_pkgs = [pkg for pkg in existing_pkgs if
Version.parse(pkg["version"]).prerelease]
return max([Version.parse(pkg["version"]) for pkg in existing_pkgs])
def get_integration_schema_data(data, meta, package_integrations: dict) -> Generator[dict, None, None]:
"""Iterates over specified integrations from package-storage and combines schemas per version."""
# lazy import to avoid circular import
from .rule import ( # pylint: disable=import-outside-toplevel
ESQLRuleData, QueryRuleData, RuleMeta)
data: QueryRuleData = data
meta: RuleMeta = meta
packages_manifest = load_integrations_manifests()
integrations_schemas = load_integrations_schemas()
# validate the query against related integration fields
if (isinstance(data, QueryRuleData) or isinstance(data, ESQLRuleData)) \
and data.language != 'lucene' and meta.maturity == "production":
for stack_version, mapping in meta.get_validation_stack_versions().items():
ecs_version = mapping['ecs']
endgame_version = mapping['endgame']
ecs_schema = ecs.flatten_multi_fields(ecs.get_schema(ecs_version, name='ecs_flat'))
for pk_int in package_integrations:
package = pk_int["package"]
integration = pk_int["integration"]
# Use the minimum stack version from the package not the rule
min_stack = meta.min_stack_version or load_current_package_version()
min_stack = Version.parse(min_stack, optional_minor_and_patch=True)
# Extract the integration schema fields
integration_schema, package_version = get_integration_schema_fields(integrations_schemas, package,
integration, min_stack,
packages_manifest, ecs_schema,
data)
data = {"schema": integration_schema, "package": package, "integration": integration,
"stack_version": stack_version, "ecs_version": ecs_version,
"package_version": package_version, "endgame_version": endgame_version}
yield data
def get_integration_schema_fields(integrations_schemas: dict, package: str, integration: str,
min_stack: Version, packages_manifest: dict,
ecs_schema: dict, data: dict) -> dict:
"""Extracts the integration fields to schema based on package integrations."""
package_version, notice = find_latest_compatible_version(package, integration, min_stack, packages_manifest)
notify_user_if_update_available(data, notice, integration)
schema = collect_schema_fields(integrations_schemas, package, package_version, integration)
schema.update(ecs_schema)
integration_schema = {key: kql.parser.elasticsearch_type_family(value) for key, value in schema.items()}
return integration_schema, package_version
def notify_user_if_update_available(data: dict, notice: list, integration: str) -> None:
"""Notifies the user if an update is available, only once per integration."""
global _notified_integrations
if notice and data.get("notify", False) and integration not in _notified_integrations:
# flag to only warn once per integration for available upgrades
_notified_integrations.add(integration)
print(f"\n{data.get('name')}")
print('\n'.join(notice))
def collect_schema_fields(integrations_schemas: dict, package: str, package_version: str,
integration: Optional[str] = None) -> dict:
"""Collects the schema fields for a given integration."""
if integration is None:
return {field: value for dataset in integrations_schemas[package][package_version] if dataset != "jobs"
for field, value in integrations_schemas[package][package_version][dataset].items()}
if integration not in integrations_schemas[package][package_version]:
raise ValueError(f"Integration {integration} not found in package {package} version {package_version}")
return integrations_schemas[package][package_version][integration]
def parse_datasets(datasets: list, package_manifest: dict) -> List[Optional[dict]]:
"""Parses datasets into packaged integrations from rule data."""
packaged_integrations = []
for value in sorted(datasets):
# cleanup extra quotes pulled from ast field
value = value.strip('"')
integration = 'Unknown'
if '.' in value:
package, integration = value.split('.', 1)
# Handle cases where endpoint event datasource needs to be parsed uniquely (e.g endpoint.events.network)
# as endpoint.network
if package == "endpoint" and "events" in integration:
integration = integration.split('.')[1]
else:
package = value
if package in list(package_manifest):
packaged_integrations.append({"package": package, "integration": integration})
return packaged_integrations
class SecurityDetectionEngine:
"""Dedicated to Security Detection Engine integration."""
def __init__(self):
self.epr_url = "https://epr.elastic.co/package/security_detection_engine/"
def load_integration_assets(self, package_version: Version) -> dict:
"""Loads integration assets into memory."""
epr_package_url = f"{self.epr_url}{str(package_version)}/"
epr_response = requests.get(epr_package_url, timeout=10)
epr_response.raise_for_status()
package_obj = epr_response.json()
zip_url = f"https://epr.elastic.co{package_obj['download']}"
zip_response = requests.get(zip_url)
with unzip(zip_response.content) as zip_package:
asset_file_names = [asset for asset in zip_package.namelist() if "json" in asset]
assets = {x.split("/")[-1].replace(".json", ""): json.loads(zip_package.read(x).decode('utf-8'))
for x in asset_file_names}
return assets
def keep_latest_versions(self, assets: dict, num_versions: int = NUM_LATEST_RULE_VERSIONS) -> dict:
"""Keeps only the latest N versions of each rule to limit historical rule versions in our release package."""
# Dictionary to hold the sorted list of versions for each base rule ID
rule_versions = defaultdict(list)
# Separate rule ID and version, and group by base rule ID
for key in assets:
base_id, version = assets[key]["attributes"]["rule_id"], assets[key]["attributes"]["version"]
version = int(version) # Convert version to an integer for sorting
rule_versions[base_id].append((version, key))
# Dictionary to hold the final assets with only the specified number of latest versions
filtered_assets = {}
# Keep only the last/latest num_versions versions for each rule
# Sort versions and take the last num_versions
# Add the latest versions of the rule to the filtered assets
for base_id, versions in rule_versions.items():
latest_versions = sorted(versions, key=lambda x: x[0], reverse=True)[:num_versions]
for _, key in latest_versions:
filtered_assets[key] = assets[key]
return filtered_assets