Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

converting graphql queries to use variables #120

Merged
merged 41 commits into from
May 20, 2024
Merged
Show file tree
Hide file tree
Changes from 38 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
21e0ceb
converting two graphql queries for testing
mdfleury-wbd Apr 15, 2024
265d64e
adding another query
mdfleury-wbd Apr 18, 2024
9649d17
adding more variables
mdfleury-wbd Apr 24, 2024
b18abf3
working tests
mdfleury-wbd Apr 25, 2024
51ef79c
updating tests, graphql query
mdfleury-wbd Apr 30, 2024
256bd70
fixing linter
mdfleury-wbd May 1, 2024
66ac88d
fixing tests and linter
mdfleury-wbd May 1, 2024
76c6976
fixed tests, moved all queries to use vars
mdfleury-wbd May 9, 2024
301703a
Merge remote-tracking branch 'origin/main' into mdfleury-wbd/use-grap…
mdfleury-wbd May 9, 2024
48e08ac
removing unused package
mdfleury-wbd May 9, 2024
a4b0539
removing duplicated function
mdfleury-wbd May 9, 2024
1e53c6e
linting cleanup
mdfleury-wbd May 9, 2024
8b30c4f
fixing graphql query issue
mdfleury-wbd May 13, 2024
cf3b872
fixing test
mdfleury-wbd May 13, 2024
ba64d96
fixing tests
mdfleury-wbd May 13, 2024
15e940c
adding fix for non batch, and adding test
mdfleury-wbd May 13, 2024
befc66e
updating query name to match test
mdfleury-wbd May 13, 2024
a30135e
fixing gitlab
mdfleury-wbd May 14, 2024
5f02d23
Merge branch 'main' into mdfleury-wbd/use-graphql-vars
ZoogieZork May 14, 2024
c047f7a
fixing other tests
mdfleury-wbd May 14, 2024
deae458
fixing another test
mdfleury-wbd May 14, 2024
ff5120a
fixing another test
mdfleury-wbd May 14, 2024
b7e6620
PR improvements
mdfleury-wbd May 14, 2024
93b8113
fixing test, adding to readme
mdfleury-wbd May 14, 2024
374a91a
fixing misinfo in README
mdfleury-wbd May 14, 2024
9175b39
fixing regex
mdfleury-wbd May 14, 2024
957e0b7
adding missing code for readme
mdfleury-wbd May 14, 2024
37ccc3f
typo in readme
mdfleury-wbd May 14, 2024
0c0a283
adding setup.py
mdfleury-wbd May 15, 2024
abc1693
adding child deps
mdfleury-wbd May 15, 2024
a12e8db
trying again
mdfleury-wbd May 15, 2024
8008310
switching back to other version
mdfleury-wbd May 15, 2024
9f50deb
try to specify lambda platform
mdfleury-wbd May 16, 2024
5ea7061
try again
mdfleury-wbd May 16, 2024
55d7b39
Merge remote-tracking branch 'origin/main' into mdfleury-wbd/use-grap…
mdfleury-wbd May 16, 2024
20ff20d
adding another platform
mdfleury-wbd May 16, 2024
91f9ce7
again and again
mdfleury-wbd May 16, 2024
600ab07
specify py version
mdfleury-wbd May 16, 2024
cca82f4
removing extra print
mdfleury-wbd May 20, 2024
c15c370
Merge remote-tracking branch 'origin/main' into mdfleury-wbd/use-grap…
mdfleury-wbd May 20, 2024
8817b84
using correct requests
mdfleury-wbd May 20, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backend/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ dist/api/index.html: lambdas/api/spec.yaml
dist/lambdas/%.zip: $$(shell find lambdas/$$* -type f)
@echo "${INFO}Building $*"
mkdir -p ${BUILD_DIR}/lambdas/$*
${PIP} install lambdas/$* -t ${BUILD_DIR}/lambdas/$* --upgrade
${PIP} install lambdas/$* -t ${BUILD_DIR}/lambdas/$* --upgrade --only-binary=:all: ${LAMBDA_PLATFORM_FLAGS} --python-version ${LAMBDA_PYTHON_VER}
cp lambdas/$*/handlers.py ${BUILD_DIR}/lambdas/$*
mkdir -p ${DIST_DIR}/lambdas/$*
cd ${BUILD_DIR}/lambdas/$*; zip -r ${DIST_DIR}/lambdas/$*.zip *
Expand Down
1 change: 1 addition & 0 deletions backend/Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ pyjwt = "*"
cryptography = "*"
packaging = "==21.3"
urllib3 = "<2"
graphql-query = "*"
sqlparse = "==0.5.0"

[dev-packages]
Expand Down
192 changes: 188 additions & 4 deletions backend/Pipfile.lock

Large diffs are not rendered by default.

91 changes: 44 additions & 47 deletions backend/lambdas/api/repo/repo/github_util/github_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json

import requests
from graphql_query import Argument, Field, Operation, Query, Variable

from artemislib.github.app import GithubApp
from repo.util.aws import AWSConnect
Expand All @@ -18,31 +18,35 @@ def process_github(
total_failed = []
grouped_reqs = _group_reqs(req_list)
for org in grouped_reqs:
authorization = _get_authorization(org, service_secret)
options_map = build_options_map(grouped_reqs[org])
query_list, query_map, unauthorized = _build_queries(grouped_reqs[org], service, identity.scope)
query, query_map, query_vars, unauthorized = _build_query(org, grouped_reqs[org], service, identity.scope)
queued, failed = _query(
query_list,
query,
query_map,
query_vars,
options_map,
service,
service_url,
service_secret,
authorization,
nat_connect=nat_connect,
identity=identity,
diff_url=diff_url,
org=org,
)
total_queued += queued
total_failed += failed

return PROCESS_RESPONSE_TUPLE(total_queued, total_failed, unauthorized)


def _build_queries(req_list, service, authz):
def _build_query(org, req_list, service, authz):
# Build up a GraphQL query for each repo in the request
unauthorized = []
query_list = []
query_map = {}
variables = {"org": org}
var_defs = {}
var_defs.update({"org": Variable(name="org", type="String!")})

count = 0
for req in req_list:
Expand All @@ -56,53 +60,50 @@ def _build_queries(req_list, service, authz):
unauthorized.append({"repo": f"{service}/{org_name}/{req['repo']}", "error": "Not Authorized"})
continue

repo_alias = f"repo{count}"
variables.update({repo_alias: req["repo"]})

var_defs.update({repo_alias: Variable(name=repo_alias, type="String!")})
query = Query(
name="repository",
alias=repo_alias,
arguments=[
Argument(name="owner", value=var_defs.get("org")),
Argument(name="name", value=var_defs.get(repo_alias)),
],
fields=["url", "nameWithOwner", "isPrivate", "diskUsage"],
)

if branch_name:
# Escape Double quotes in branch name. Leaving double quotes in will affect the graphql query
branch_name = branch_name.replace('"', '\\"')
query_list.append(
"""
repo%d: repository(owner: "%s", name: "%s") {
url
nameWithOwner
isPrivate
diskUsage
ref(qualifiedName: "%s") {name}
}
"""
% (count, org_name, req["repo"], branch_name)
)
else:
# If no branch was specified don't include ref in the query so
# we can distinguish between no branch and invalid branch in the
# query results.
query_list.append(
"""
repo%d: repository(owner: "%s", name: "%s") {
url
nameWithOwner
isPrivate
diskUsage
}
"""
% (count, org_name, req["repo"])
branch_alias = f"branch{count}"
var_defs.update({branch_alias: Variable(name=branch_alias, type="String!")})
query.fields.append(
Field(
name="ref",
arguments=[Argument(name="qualifiedName", value=var_defs.get(branch_alias))],
fields=["name"],
)
)
variables.update({f"branch{count}": branch_name})

query_map["repo%d" % count] = "%s/%s" % (org_name, req["repo"])
query_list.append(query)
query_map[repo_alias] = f"{org_name}/{req['repo']}"
count += 1

return query_list, query_map, unauthorized
operation = Operation(type="query", name="GetRepos", variables=var_defs.values(), queries=query_list)
return operation.render(), query_map, variables, unauthorized


def _get_query_response(authorization, service_url, query_list):
def _get_query_response(authorization, service_url, query, variables):
# Query the GitHub API
headers = {"Authorization": authorization, "Content-Type": "application/json"}
if REV_PROXY_DOMAIN_SUBSTRING and REV_PROXY_DOMAIN_SUBSTRING in service_url:
headers[REV_PROXY_SECRET_HEADER] = GetProxySecret()
log.error(service_url)

response = requests.post(
url=service_url,
headers=headers,
json={"query": "{%s}" % " ".join(query_list)},
json={"query": query, "variables": variables},
)

log.info("Got API response")
Expand All @@ -115,22 +116,18 @@ def _get_query_response(authorization, service_url, query_list):


def _query(
query_list, query_map, options_map, service, service_url, service_secret, nat_connect, identity, diff_url, org
query, query_map, query_vars, options_map, service, service_url, authorization, nat_connect, identity, diff_url
):
"""
todo: move queueing repos to aws.py
"""
aws_connect = AWSConnect()
queued = []
failed = []
if not query_list:
if not query:
return queued, failed

authorization = _get_authorization(org, service_secret)
log.info("Querying GitHub API for %d repos" % len(query_map))

log.info("Querying GitHub API for %d repos" % len(query_list))
resp = _get_query_response(authorization, service_url, query, query_vars)

resp = _get_query_response(authorization, service_url, query_list)
if resp is None:
log.info("Query was invalid, returning")
return None
Expand Down
10 changes: 6 additions & 4 deletions backend/lambdas/api/repo/repo/gitlab_util/process_gitlab.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from repo.gitlab_util.process_gitlab_utils import (
_build_queries,
_process_query_list,
build_queries,
process_query_list,
check_diff,
queue_gitlab_repository,
)
Expand All @@ -12,9 +12,10 @@

def process_gitlab(req_list, service, service_url, service_secret, batch_queries, nat_connect, identity, diff_url):
options_map = build_options_map(req_list)
query_list, query_map, unauthorized = _build_queries(req_list, identity.scope, service)
query_list, variables, query_map, unauthorized = build_queries(req_list, identity.scope, service, batch_queries)
queued, failed = _query(
query_list,
variables,
query_map,
options_map,
service,
Expand All @@ -31,6 +32,7 @@ def process_gitlab(req_list, service, service_url, service_secret, batch_queries

def _query(
query_list,
variables,
query_map,
options_map,
service,
Expand All @@ -50,7 +52,7 @@ def _query(

log.info(f"Querying {service} API for {len(query_list)} repos")

resp = _process_query_list(key, service_url, query_list, batch_queries)
resp = process_query_list(key, service_url, query_list, variables, batch_queries)

log.info("Queuing repos")

Expand Down
81 changes: 54 additions & 27 deletions backend/lambdas/api/repo/repo/gitlab_util/process_gitlab_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,41 +2,39 @@
import json
import re
import urllib
from string import Template

import requests
from graphql_query import Argument, Field, Operation, Query, Variable

from repo.util.aws import AWSConnect
from repo.util.const import GITLAB_QUERY_NO_BRANCH, GITLAB_QUERY_WITH_BRANCH
from repo.util.env import DEFAULT_ORG, REV_PROXY_DOMAIN_SUBSTRING, REV_PROXY_SECRET_HEADER
from repo.util.utils import GetProxySecret, Logger, auth

log = Logger(__name__)


def _process_query_list(key, service_url, query_list, batch_query=True):
def process_query_list(key, service_url, query_list, vars, batch_query=True):
if batch_query:
query = "{%s}" % " ".join(query_list)
return _get_query_response(key, service_url, query)
return _get_query_response(key, service_url, query_list, vars)
response_dict = {"data": {}}
for query_item in query_list:
query = "query {}".format(query_item)
resp = _get_query_response(key, service_url, query)
if resp and "data" in resp and "project" in resp.get("data"):
resp_data = resp["data"]["project"]
repo = re.match("repo[0-9]*", query_item.strip()).group(0)
response_dict["data"][repo] = resp_data
resp = _get_query_response(key, service_url, query_item, vars)
if resp and "data" in resp:
resp_data = resp["data"]
# Parsing out the alias from the GraphQL query, so we can use it for mapping.
# ex: query GetRepos($repo0: ID!) {repo0: project(fullPath: $repo0) {**}
repo = re.search("\$(repo[0-9]*):", query_item.strip()).group(1)
response_dict["data"][repo] = resp_data[repo]
else:
log.error("Repo query failed to receive valid output: %s", query_item)

return response_dict


def _get_query_response(key, service_url, query):
def _get_query_response(key, service_url, query, vars):
headers = {"Authorization": "Bearer %s" % key, "Content-Type": "application/json"}
if REV_PROXY_DOMAIN_SUBSTRING and REV_PROXY_DOMAIN_SUBSTRING in service_url:
headers[REV_PROXY_SECRET_HEADER] = GetProxySecret()
response = requests.post(url=service_url, headers=headers, json={"query": query})
response = requests.post(url=service_url, headers=headers, json={"query": query, "variables": vars})
log.info("Got API response")
if response.status_code != 200:
log.error("Error retrieving query: %s", response.text)
Expand All @@ -59,39 +57,68 @@ def check_diff(diff_url, key, org_repo, base, compare):
return r.status_code == 200


def _build_queries(req_list, authz, service):
def build_queries(req_list, authz, service, batch_queries):
# Build up a GraphQL query for each repo in the request
unauthorized = []
query_list = []
query_map = {}
variables = {}
var_defs = {}
queries = []

count = 0
for req in req_list:
branch_name = req.get("branch")
org_name = req.get("org", DEFAULT_ORG)

# Validate that this API key is authorized to scan this repo
allowed = auth(f"{org_name}/{req['repo']}", service, authz)
if not allowed:
unauthorized.append({"repo": f"{service}/{org_name}/{req['repo']}", "error": "Not Authorized"})
continue

repo_alias = f"repo{count}"
repo_id = f"{org_name}/{req['repo']}"
variables.update({repo_alias: repo_id})

var_defs.update({repo_alias: Variable(name=repo_alias, type="ID!")})
query = Query(
name="project",
alias=repo_alias,
arguments=[
Argument(name="fullPath", value=var_defs.get(repo_alias)),
],
fields=["httpUrlToRepo", "fullPath", "visibility", Field(name="statistics", fields=["repositorySize"])],
)

if branch_name:
query = Template(GITLAB_QUERY_WITH_BRANCH).substitute(
count=count, org_name=org_name, repo=req["repo"], branch=branch_name
branch_alias = f"branch{count}"
var_defs.update({branch_alias: Variable(name=branch_alias, type="String!")})
query.fields.append(
Field(
name="repository",
fields=[
Field(
name="tree",
arguments=[Argument(name="ref", value=var_defs.get(branch_alias))],
fields=[Field(name="lastCommit", fields=["id"])],
)
],
)
)
query_list.append(query)
else:
# If no branch was specified don't include ref in the query so
# we can distinguish between no branch and invalid branch in the
# query results.
query = Template(GITLAB_QUERY_NO_BRANCH).substitute(count=count, org_name=org_name, repo=req["repo"])
query_list.append(query)
variables.update({branch_alias: branch_name})

query_map["repo%d" % count] = {"repo": "%s/%s" % (org_name, req["repo"]), "branch": branch_name}
query_list.append(query)
query_map["repo%d" % count] = {"repo": repo_id, "branch": branch_name}
count += 1

return query_list, query_map, unauthorized
if batch_queries:
operation = Operation(type="query", name="GetRepos", variables=var_defs.values(), queries=query_list)
queries.append(operation.render())
else:
for item in query_list:
operation = Operation(type="query", name="GetRepos", variables=var_defs.values(), queries=[item])
queries.append(operation.render())
return queries, variables, query_map, unauthorized


def queue_gitlab_repository(
Expand Down
30 changes: 1 addition & 29 deletions backend/lambdas/api/repo/repo/util/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Constants file
todo : search plugin settings.json to obtain a dynamic list of plugins and categories.
"""

# pylint: disable=no-member
import os
from collections import namedtuple
Expand Down Expand Up @@ -186,35 +187,6 @@
"exclude_paths",
]

GITLAB_QUERY_WITH_BRANCH = """
repo$count{ project(fullPath: "$org_name/$repo") {
httpUrlToRepo,
fullPath,
visibility,
statistics {
repositorySize
}
repository {
tree(ref: "$branch") {
lastCommit {
id
}
}
}
}}
"""

GITLAB_QUERY_NO_BRANCH = """
repo$count{ project(fullPath: "$org_name/$repo") {
httpUrlToRepo,
fullPath,
visibility,
statistics {
repositorySize
}
}}
"""

BITBUCKET_PUBLIC_REPO_QUERY = "$service_url/repositories/$org/$repo"

BITBUCKET_PRIVATE_REPO_QUERY = "$service_url/projects/$org/repos/$repo"
Expand Down
Loading
Loading