Skip to content

Commit dec3ed2

Browse files
authored
feat: add daily GA4 data fetching workflow (#355)
Signed-off-by: Luca Muscariello <muscariello@ieee.org>
1 parent 2c39f50 commit dec3ed2

File tree

3 files changed

+141
-1
lines changed

3 files changed

+141
-1
lines changed

.github/scripts/fetch_ga4.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
import os
2+
import csv
3+
import json
4+
from google.analytics.data_v1beta import BetaAnalyticsDataClient
5+
from google.analytics.data_v1beta.types import (
6+
DateRange,
7+
Dimension,
8+
Metric,
9+
RunReportRequest,
10+
)
11+
from google.oauth2 import service_account
12+
13+
def fetch_analytics_data():
14+
property_id = os.environ.get("GA4_PROPERTY_ID")
15+
service_account_json = os.environ.get("GA4_SERVICE_ACCOUNT_JSON")
16+
17+
if not property_id:
18+
raise ValueError("Environment variable GA4_PROPERTY_ID must be set.")
19+
20+
if not service_account_json:
21+
raise ValueError("Environment variable GA4_SERVICE_ACCOUNT_JSON must be set.")
22+
23+
# Authenticate using the JSON string directly
24+
try:
25+
info = json.loads(service_account_json)
26+
credentials = service_account.Credentials.from_service_account_info(info)
27+
client = BetaAnalyticsDataClient(credentials=credentials)
28+
except json.JSONDecodeError:
29+
raise ValueError("GA4_SERVICE_ACCOUNT_JSON is not valid JSON.")
30+
31+
print(f"Fetching data for property: {property_id}")
32+
33+
# Define request - getting data for yesterday
34+
request = RunReportRequest(
35+
property=f"properties/{property_id}",
36+
dimensions=[Dimension(name="date")],
37+
metrics=[
38+
Metric(name="activeUsers"),
39+
Metric(name="sessions"),
40+
Metric(name="screenPageViews"),
41+
Metric(name="eventCount")
42+
],
43+
date_ranges=[DateRange(start_date="yesterday", end_date="yesterday")],
44+
)
45+
46+
try:
47+
response = client.run_report(request)
48+
except Exception as e:
49+
print(f"Error fetching report: {e}")
50+
exit(1)
51+
52+
output_dir = "data"
53+
os.makedirs(output_dir, exist_ok=True)
54+
csv_file = os.path.join(output_dir, "ga4_stats.csv")
55+
56+
file_exists = os.path.isfile(csv_file)
57+
58+
# Check if we already have data for these dates to avoid duplicates
59+
existing_dates = set()
60+
if file_exists:
61+
with open(csv_file, mode='r', newline='') as file:
62+
reader = csv.reader(file)
63+
try:
64+
next(reader) # Skip header
65+
for row in reader:
66+
if row:
67+
existing_dates.add(row[0])
68+
except StopIteration:
69+
pass
70+
71+
with open(csv_file, mode='a', newline='') as file:
72+
writer = csv.writer(file)
73+
74+
# Write header if new file
75+
if not file_exists:
76+
headers = ['date', 'activeUsers', 'sessions', 'screenPageViews', 'eventCount']
77+
writer.writerow(headers)
78+
79+
rows_added = 0
80+
for row in response.rows:
81+
date_str = row.dimension_values[0].value
82+
formatted_date = f"{date_str[:4]}-{date_str[4:6]}-{date_str[6:]}"
83+
84+
if formatted_date in existing_dates:
85+
print(f"Skipping {formatted_date} - already exists in file.")
86+
continue
87+
88+
data_row = [
89+
formatted_date,
90+
row.metric_values[0].value, # activeUsers
91+
row.metric_values[1].value, # sessions
92+
row.metric_values[2].value, # screenPageViews
93+
row.metric_values[3].value # eventCount
94+
]
95+
writer.writerow(data_row)
96+
rows_added += 1
97+
print(f"Appended data for {formatted_date}")
98+
99+
if rows_added == 0 and not response.rows:
100+
print("No data found for yesterday.")
101+
102+
if __name__ == "__main__":
103+
fetch_analytics_data()

.github/scripts/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
21
requests
32
playwright
43
beautifulsoup4
4+
google-analytics-data

.github/workflows/ga4-daily.yml

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
name: Publish GA4 Data
2+
on:
3+
schedule:
4+
- cron: '0 8 * * *' # Run at 8:00 AM UTC daily (adjust as needed for timezone)
5+
workflow_dispatch: # Allow manual trigger
6+
7+
permissions:
8+
contents: write
9+
10+
jobs:
11+
fetch-and-publish:
12+
runs-on: ubuntu-latest
13+
steps:
14+
- name: Checkout code
15+
uses: actions/checkout@v4
16+
17+
- name: Set up Python
18+
uses: actions/setup-python@v5
19+
with:
20+
python-version: '3.11'
21+
22+
- name: Install dependencies
23+
run: |
24+
python -m pip install --upgrade pip
25+
pip install -r .github/scripts/requirements.txt
26+
27+
- name: Fetch GA4 Data
28+
env:
29+
GA4_PROPERTY_ID: ${{ secrets.GA4_PROPERTY_ID }}
30+
GA4_SERVICE_ACCOUNT_JSON: ${{ secrets.GA4_SERVICE_ACCOUNT_JSON }}
31+
run: python .github/scripts/fetch_ga4.py
32+
33+
- name: Upload GA4 statistics artifact
34+
uses: actions/upload-artifact@v4
35+
with:
36+
name: ga4-daily-stats
37+
path: data/ga4_stats.csv

0 commit comments

Comments
 (0)