Skip to content

Commit 28c2e53

Browse files
Merge pull request #157 from blockchain-etl/upgrade-composer-version
Upgrade composer version
2 parents 11d33e2 + c222580 commit 28c2e53

28 files changed

+89
-86
lines changed

airflow/README.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ Airflow DAGs for exporting and loading the Polygon blockchain data to Google Big
3939
gcloud composer environments create \
4040
${ENVIRONMENT_NAME} \
4141
--location=us-central1 \
42-
--image-version=composer-2.0.28-airflow-2.2.5 \
42+
--image-version=composer-2.1.14-airflow-2.5.1 \
4343
--environment-size=small \
4444
--scheduler-cpu=2 \
4545
--scheduler-memory=4 \
@@ -96,13 +96,13 @@ Note that the variable names must be prefixed with `{chain}_`, e.g. `polygon_out
9696
| `output_bucket` | GCS bucket where exported files with blockchain data will be stored |
9797
| `export_start_date` | export start date, default: `2019-04-22` |
9898
| `export_end_date` | export end date, used for integration testing, default: None |
99-
| `export_schedule_interval` | export cron schedule, default: `0 1 * * *` |
99+
| `export_schedule` | export cron schedule, default: `0 1 * * *` |
100100
| `provider_uris` | comma-separated list of provider URIs for [polygon-etl](https://polygon-etl.readthedocs.io/en/latest/commands) command |
101101
| `notification_emails` | comma-separated list of emails where notifications on DAG failures, retries and successes will be delivered. This variable must not be prefixed with `{chain}_` |
102102
| `export_max_active_runs` | max active DAG runs for export, default: `3` |
103103
| `export_max_workers` | max workers for [polygon-etl](https://polygon-etl.readthedocs.io/en/latest/commands) command, default: `5` |
104104
| `destination_dataset_project_id` | GCS project id where destination BigQuery dataset is |
105-
| `load_schedule_interval` | load cron schedule, default: `0 2 * * *` |
105+
| `load_schedule` | load cron schedule, default: `0 2 * * *` |
106106
| `load_end_date` | load end date, used for integration testing, default: None |
107107

108108
### Creating a Cloud Source Repository for Configuration Files

airflow/dags/polygon_export_dag.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
dag_id='polygon_export_dag',
99
**read_export_dag_vars(
1010
var_prefix='polygon_',
11-
export_schedule_interval='0 2 * * *',
11+
export_schedule='0 2 * * *',
1212
export_start_date='2020-05-30',
1313
export_max_active_runs=3,
1414
export_max_active_tasks=12,

airflow/dags/polygon_load_dag.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,6 @@
1414
chain='polygon',
1515
**read_load_dag_vars(
1616
var_prefix='polygon_',
17-
load_schedule_interval='0 7 * * *'
17+
load_schedule='0 7 * * *'
1818
)
1919
)

airflow/dags/polygon_parse_dag.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
parse_dag_vars = read_parse_dag_vars(
1919
var_prefix=var_prefix,
20-
parse_schedule_interval='30 8 * * *'
20+
parse_schedule='30 8 * * *'
2121
)
2222

2323
for folder in glob(table_definitions_folder):

airflow/dags/polygon_partition_dag.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,6 @@
1616
public_dataset_name = 'crypto_polygon',
1717
**read_partition_dag_vars(
1818
var_prefix="polygon_",
19-
partition_schedule_interval="0 8 * * *",
19+
partition_schedule="0 8 * * *",
2020
),
2121
)

airflow/dags/polygonetl_airflow/build_export_dag.py

+18-18
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from tempfile import TemporaryDirectory
88

99
from airflow import DAG, configuration
10-
from airflow.operators.dummy import DummyOperator
10+
from airflow.operators.empty import EmptyOperator
1111
from airflow.operators.python import PythonOperator
1212

1313
from polygonetl.cli import (
@@ -34,21 +34,21 @@
3434

3535

3636
def build_export_dag(
37-
dag_id,
38-
provider_uris,
39-
provider_uris_archival,
40-
output_bucket,
41-
export_start_date,
42-
export_end_date=None,
43-
notification_emails=None,
44-
export_schedule_interval='0 0 * * *',
45-
export_max_workers=10,
46-
export_traces_max_workers=10,
47-
export_batch_size=200,
48-
export_max_active_runs=None,
49-
export_max_active_tasks=None,
50-
export_retries=5,
51-
**kwargs
37+
dag_id,
38+
provider_uris,
39+
provider_uris_archival,
40+
output_bucket,
41+
export_start_date,
42+
export_end_date=None,
43+
notification_emails=None,
44+
export_schedule='0 0 * * *',
45+
export_max_workers=10,
46+
export_traces_max_workers=10,
47+
export_batch_size=200,
48+
export_max_active_runs=None,
49+
export_max_active_tasks=None,
50+
export_retries=5,
51+
**kwargs
5252
):
5353
default_dag_args = {
5454
"depends_on_past": False,
@@ -82,7 +82,7 @@ def build_export_dag(
8282

8383
dag = DAG(
8484
dag_id,
85-
schedule_interval=export_schedule_interval,
85+
schedule=export_schedule,
8686
default_args=default_dag_args,
8787
max_active_runs=export_max_active_runs,
8888
max_active_tasks=export_max_active_tasks,
@@ -345,7 +345,7 @@ def add_export_task(
345345
return None
346346

347347
# Operators
348-
export_complete = DummyOperator(task_id="export_complete", dag=dag)
348+
export_complete = EmptyOperator(task_id="export_complete", dag=dag)
349349

350350
export_blocks_and_transactions_operator = add_export_task(
351351
export_blocks_and_transactions_toggle,

airflow/dags/polygonetl_airflow/build_load_dag.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def build_load_dag(
3535
load_start_date=datetime(2018, 7, 1),
3636
load_end_date=None,
3737
load_catchup=False,
38-
load_schedule_interval='0 0 * * *',
38+
load_schedule='0 0 * * *',
3939
load_all_partitions=True
4040
):
4141
# The following datasets must be created in BigQuery:
@@ -93,7 +93,7 @@ def read_file(filepath):
9393
dag = models.DAG(
9494
dag_id,
9595
catchup=load_catchup,
96-
schedule_interval=load_schedule_interval,
96+
schedule=load_schedule,
9797
default_args=default_dag_args)
9898

9999
dags_folder = os.environ.get('DAGS_FOLDER', '/home/airflow/gcs/dags')

airflow/dags/polygonetl_airflow/build_parse_dag.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def build_parse_dag(
3333
internal_project_id,
3434
notification_emails=None,
3535
parse_start_date=datetime(2020, 5, 30),
36-
parse_schedule_interval='0 0 * * *',
36+
parse_schedule='0 0 * * *',
3737
parse_all_partitions=None,
3838
):
3939

@@ -57,7 +57,7 @@ def build_parse_dag(
5757
dag = models.DAG(
5858
dag_id,
5959
catchup=False,
60-
schedule_interval=parse_schedule_interval,
60+
schedule=parse_schedule,
6161
default_args=default_dag_args)
6262

6363
def create_parse_task():

airflow/dags/polygonetl_airflow/build_partition_dag.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def build_partition_dag(
2323
public_dataset_name,
2424
load_dag_id,
2525
partition_start_date=datetime(2015, 7, 30),
26-
partition_schedule_interval='0 0 * * *',
26+
partition_schedule='0 0 * * *',
2727
notification_emails=None,
2828
):
2929

@@ -44,7 +44,7 @@ def build_partition_dag(
4444
dag = models.DAG(
4545
dag_id,
4646
catchup=False,
47-
schedule_interval=partition_schedule_interval,
47+
schedule=partition_schedule,
4848
default_args=default_dag_args)
4949

5050
def add_partition_tasks(task, sql_template, dependencies=None):

airflow/dags/polygonetl_airflow/build_verify_streaming_dag.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def build_verify_streaming_dag(
1919
chain='polygon',
2020
notification_emails=None,
2121
start_date=datetime(2018, 7, 1),
22-
schedule_interval='*/10 * * * *',
22+
schedule='*/10 * * * *',
2323
max_lag_in_minutes=15):
2424
dataset_name = 'crypto_{}'.format(chain)
2525

@@ -46,7 +46,7 @@ def build_verify_streaming_dag(
4646
dag = DAG(
4747
dag_id,
4848
catchup=False,
49-
schedule_interval=schedule_interval,
49+
schedule=schedule,
5050
default_args=default_dag_args)
5151

5252
dags_folder = os.environ.get('DAGS_FOLDER', '/home/airflow/gcs/dags')

airflow/dags/polygonetl_airflow/variables.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def read_export_dag_vars(var_prefix, **kwargs):
2727
'output_bucket': read_var('output_bucket', var_prefix, True, **kwargs),
2828
'export_start_date': export_start_date,
2929
'export_end_date': export_end_date,
30-
'export_schedule_interval': read_var('export_schedule_interval', var_prefix, True, **kwargs),
30+
'export_schedule': read_var('export_schedule', var_prefix, True, **kwargs),
3131
'provider_uris': provider_uris,
3232
'provider_uris_archival': provider_uris_archival,
3333
'notification_emails': read_var('notification_emails', None, False, **kwargs),
@@ -52,7 +52,7 @@ def read_load_dag_vars(var_prefix, **kwargs):
5252
'destination_dataset_project_id': read_var('destination_dataset_project_id', var_prefix, True, **kwargs),
5353
'notification_emails': read_var('notification_emails', None, False, **kwargs),
5454
# 'success_notification_emails': read_var('success_notification_emails', None, False, **kwargs),
55-
'load_schedule_interval': read_var('load_schedule_interval', var_prefix, True, **kwargs),
55+
'load_schedule': read_var('load_schedule', var_prefix, True, **kwargs),
5656
'load_all_partitions': parse_bool(read_var('load_all_partitions', var_prefix, False, **kwargs), default=None),
5757
'load_catchup': parse_bool(read_var('load_catchup', var_prefix, False, **kwargs), default=False),
5858
}
@@ -79,8 +79,8 @@ def read_partition_dag_vars(var_prefix, **kwargs):
7979
"partitioned_project_id": read_var(
8080
"partitioned_project_id", var_prefix, True, **kwargs
8181
),
82-
"partition_schedule_interval": read_var(
83-
"partition_schedule_interval", var_prefix, False, **kwargs
82+
"partition_schedule": read_var(
83+
"partition_schedule", var_prefix, False, **kwargs
8484
),
8585
"notification_emails": read_var("notification_emails", None, False, **kwargs),
8686
}
@@ -100,7 +100,7 @@ def read_parse_dag_vars(var_prefix, **kwargs):
100100
# internal_project_id takes its value from partitioned_project_id
101101
'internal_project_id': read_var('partitioned_project_id', var_prefix, True, **kwargs),
102102
'parse_destination_dataset_project_id': read_var('parse_destination_dataset_project_id', var_prefix, True, **kwargs),
103-
'parse_schedule_interval': read_var('parse_schedule_interval', var_prefix, True, **kwargs),
103+
'parse_schedule': read_var('parse_schedule', var_prefix, True, **kwargs),
104104
'parse_all_partitions': parse_bool(read_var('parse_all_partitions', var_prefix, False), default=None),
105105
'notification_emails': read_var('notification_emails', None, False, **kwargs),
106106
}

airflow/requirements_airflow.txt

+4-4
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# During local dev & testing, you can `pip install -e ../cli` first
33
# pip will then prioritise the local polygon-etl package over pypi
44

5-
discord-webhook==0.14.0
6-
eth-rlp==0.2.1 # Fixes install conflicts issue in Composer
7-
eth-utils==1.8.4 # Fixes install conflicts issue in Composer
8-
polygon-etl==0.3.5
5+
discord-webhook==1.1.0
6+
eth-hash==0.3.3 # Fixes install conflicts issue in Composer
7+
polygon-etl==0.3.6
8+
web3==5.31.0 # Fixes install conflicts issue in Composer

airflow/requirements_local.txt

+6-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
apache-airflow[google]==2.2.5 # similar to `composer-2.0.28-airflow-2.2.5`
2-
Flask==1.1.2 # matches `composer-2.0.28-airflow-2.2.5`
3-
google-api-core==2.8.2 # matches `composer-2.0.28-airflow-2.2.5`
4-
google-cloud-bigquery==2.34.4 # matches `composer-2.0.28-airflow-2.2.5`
5-
google-cloud-storage==1.44.0 # matches `composer-2.0.28-airflow-2.2.5`
1+
apache-airflow[gcp]==2.5.1 # similar to `composer-2.1.14-airflow-2.5.1`
2+
Flask==2.2.2 # matches `composer-2.1.14-airflow-2.5.1`
3+
google-api-core==2.8.1 # matches `composer-2.1.14-airflow-2.5.1`
4+
google-cloud-bigquery==2.34.4 # matches `composer-2.1.14-airflow-2.5.1`
5+
google-cloud-storage==2.7.0 # matches `composer-2.1.14-airflow-2.5.1`
6+
protobuf==3.20.0 # matches `composer-2.1.14-airflow-2.5.1`

airflow/test_dags/dummy_dag.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def do_something():
1717

1818
with models.DAG(
1919
"dummy_dag",
20-
schedule_interval=timedelta(days=1),
20+
schedule=timedelta(days=1),
2121
start_date=datetime(2021, 11, 1),
2222
catchup=False,
2323
default_args={'on_failure_callback': handle_dag_failure},

cli/polygonetl/executors/batch_work_executor.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
from json.decoder import JSONDecodeError
2626

2727
from requests.exceptions import Timeout as RequestsTimeout, HTTPError, TooManyRedirects, ConnectionError as RequestsConnectionError
28-
from web3.utils.threads import Timeout as Web3Timeout
28+
from web3._utils.threads import Timeout as Web3Timeout
2929

3030
from polygonetl.executors.bounded_executor import BoundedExecutor
3131
from polygonetl.executors.fail_safe_executor import FailSafeExecutor

cli/polygonetl/jobs/export_token_transfers_job.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def _export_batch(self, block_number_batch):
8282
if token_transfer is not None:
8383
self.item_exporter.export_item(self.token_transfer_mapper.token_transfer_to_dict(token_transfer))
8484

85-
self.web3.eth.uninstallFilter(event_filter.filter_id)
85+
self.web3.eth.uninstall_filter(event_filter.filter_id)
8686

8787
def _end(self):
8888
self.batch_work_executor.shutdown()

cli/polygonetl/jobs/export_traces_job.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def _export_batch(self, block_number_batch):
8787

8888
# TODO: Change to traceFilter when this issue is fixed
8989
# https://github.com/paritytech/parity-ethereum/issues/9822
90-
json_traces = self.web3.parity.traceBlock(block_number)
90+
json_traces = self.web3.parity.trace_block(block_number)
9191

9292
if json_traces is None:
9393
raise ValueError(

cli/polygonetl/providers/ipc.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,8 @@
2424
import json
2525
import socket
2626

27+
from web3._utils.threads import Timeout
2728
from web3.providers.ipc import IPCProvider
28-
from web3.utils.threads import (
29-
Timeout,
30-
)
3129

3230
try:
3331
from json import JSONDecodeError

cli/polygonetl/providers/request.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,7 @@
33
import lru
44
import requests
55
from requests.adapters import HTTPAdapter
6-
7-
from web3.utils.caching import (
8-
generate_cache_key,
9-
)
6+
from web3._utils.caching import generate_cache_key
107

118

129
def _remove_session(key, session):

cli/polygonetl/service/eth_service.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,10 @@
2626
from polygonetl.service.graph_operations import GraphOperations, OutOfBoundsError, Point
2727
from web3.middleware import geth_poa_middleware
2828

29+
2930
class EthService(object):
3031
def __init__(self, web3):
31-
web3.middleware_stack.inject(geth_poa_middleware, layer=0)
32+
web3.middleware_onion.inject(geth_poa_middleware, layer=0)
3233
graph = BlockTimestampGraph(web3)
3334
self._graph_operations = GraphOperations(graph)
3435

@@ -72,7 +73,7 @@ def __init__(self, web3):
7273

7374
def get_first_point(self):
7475
# Ignore the genesis block as its timestamp is 0
75-
return block_to_point(self._web3.eth.getBlock(1))
76+
return block_to_point(self._web3.eth.get_block(1))
7677

7778
def get_last_point(self):
7879
return block_to_point(self._web3.eth.getBlock('latest'))

cli/polygonetl/service/eth_token_service.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
# SOFTWARE.
2222
import logging
2323

24-
from web3.exceptions import BadFunctionCallOutput
24+
from web3.exceptions import BadFunctionCallOutput, ContractLogicError
2525

2626
from polygonetl.domain.token import EthToken
2727
from polygonetl.erc20_abi import ERC20_ABI
@@ -65,7 +65,7 @@ def _call_contract_function(self, func):
6565
# OverflowError exception happens if the return type of the function doesn't match the expected type
6666
result = call_contract_function(
6767
func=func,
68-
ignore_errors=(BadFunctionCallOutput, OverflowError, ValueError),
68+
ignore_errors=(BadFunctionCallOutput, ContractLogicError, OverflowError, ValueError),
6969
default_value=None)
7070

7171
if self._function_call_result_transformer is not None:

cli/polygonetl/streaming/eth_streamer_adapter.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,13 @@ def __init__(
3434
self.item_id_calculator = EthItemIdCalculator()
3535
self.item_timestamp_calculator = EthItemTimestampCalculator()
3636
self.web3 = Web3(self.batch_web3_provider)
37-
self.web3.middleware_stack.inject(geth_poa_middleware, layer=0)
37+
self.web3.middleware_onion.inject(geth_poa_middleware, layer=0)
3838

3939
def open(self):
4040
self.item_exporter.open()
4141

4242
def get_current_block_number(self):
43-
return int(self.web3.eth.getBlock("latest").number)
43+
return int(self.web3.eth.get_block("latest").number)
4444

4545
def export_all(self, start_block, end_block):
4646
# Export blocks and transactions

cli/polygonetl/web3_utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,5 @@
2626

2727
def build_web3(provider):
2828
w3 = Web3(provider)
29-
w3.middleware_stack.inject(geth_poa_middleware, layer=0)
29+
w3.middleware_onion.inject(geth_poa_middleware, layer=0)
3030
return w3

0 commit comments

Comments
 (0)