|
| 1 | +import os |
| 2 | +import csv |
| 3 | +import json |
| 4 | +from google.analytics.data_v1beta import BetaAnalyticsDataClient |
| 5 | +from google.analytics.data_v1beta.types import ( |
| 6 | + DateRange, |
| 7 | + Dimension, |
| 8 | + Metric, |
| 9 | + RunReportRequest, |
| 10 | +) |
| 11 | +from google.oauth2 import service_account |
| 12 | + |
| 13 | +def fetch_analytics_data(): |
| 14 | + property_id = os.environ.get("GA4_PROPERTY_ID") |
| 15 | + service_account_json = os.environ.get("GA4_SERVICE_ACCOUNT_JSON") |
| 16 | + |
| 17 | + if not property_id: |
| 18 | + raise ValueError("Environment variable GA4_PROPERTY_ID must be set.") |
| 19 | + |
| 20 | + if not service_account_json: |
| 21 | + raise ValueError("Environment variable GA4_SERVICE_ACCOUNT_JSON must be set.") |
| 22 | + |
| 23 | + # Authenticate using the JSON string directly |
| 24 | + try: |
| 25 | + info = json.loads(service_account_json) |
| 26 | + credentials = service_account.Credentials.from_service_account_info(info) |
| 27 | + client = BetaAnalyticsDataClient(credentials=credentials) |
| 28 | + except json.JSONDecodeError: |
| 29 | + raise ValueError("GA4_SERVICE_ACCOUNT_JSON is not valid JSON.") |
| 30 | + |
| 31 | + print(f"Fetching data for property: {property_id}") |
| 32 | + |
| 33 | + # Define request - getting data for yesterday |
| 34 | + request = RunReportRequest( |
| 35 | + property=f"properties/{property_id}", |
| 36 | + dimensions=[Dimension(name="date")], |
| 37 | + metrics=[ |
| 38 | + Metric(name="activeUsers"), |
| 39 | + Metric(name="sessions"), |
| 40 | + Metric(name="screenPageViews"), |
| 41 | + Metric(name="eventCount") |
| 42 | + ], |
| 43 | + date_ranges=[DateRange(start_date="yesterday", end_date="yesterday")], |
| 44 | + ) |
| 45 | + |
| 46 | + try: |
| 47 | + response = client.run_report(request) |
| 48 | + except Exception as e: |
| 49 | + print(f"Error fetching report: {e}") |
| 50 | + exit(1) |
| 51 | + |
| 52 | + output_dir = "data" |
| 53 | + os.makedirs(output_dir, exist_ok=True) |
| 54 | + csv_file = os.path.join(output_dir, "ga4_stats.csv") |
| 55 | + |
| 56 | + file_exists = os.path.isfile(csv_file) |
| 57 | + |
| 58 | + # Check if we already have data for these dates to avoid duplicates |
| 59 | + existing_dates = set() |
| 60 | + if file_exists: |
| 61 | + with open(csv_file, mode='r', newline='') as file: |
| 62 | + reader = csv.reader(file) |
| 63 | + try: |
| 64 | + next(reader) # Skip header |
| 65 | + for row in reader: |
| 66 | + if row: |
| 67 | + existing_dates.add(row[0]) |
| 68 | + except StopIteration: |
| 69 | + pass |
| 70 | + |
| 71 | + with open(csv_file, mode='a', newline='') as file: |
| 72 | + writer = csv.writer(file) |
| 73 | + |
| 74 | + # Write header if new file |
| 75 | + if not file_exists: |
| 76 | + headers = ['date', 'activeUsers', 'sessions', 'screenPageViews', 'eventCount'] |
| 77 | + writer.writerow(headers) |
| 78 | + |
| 79 | + rows_added = 0 |
| 80 | + for row in response.rows: |
| 81 | + date_str = row.dimension_values[0].value |
| 82 | + formatted_date = f"{date_str[:4]}-{date_str[4:6]}-{date_str[6:]}" |
| 83 | + |
| 84 | + if formatted_date in existing_dates: |
| 85 | + print(f"Skipping {formatted_date} - already exists in file.") |
| 86 | + continue |
| 87 | + |
| 88 | + data_row = [ |
| 89 | + formatted_date, |
| 90 | + row.metric_values[0].value, # activeUsers |
| 91 | + row.metric_values[1].value, # sessions |
| 92 | + row.metric_values[2].value, # screenPageViews |
| 93 | + row.metric_values[3].value # eventCount |
| 94 | + ] |
| 95 | + writer.writerow(data_row) |
| 96 | + rows_added += 1 |
| 97 | + print(f"Appended data for {formatted_date}") |
| 98 | + |
| 99 | + if rows_added == 0 and not response.rows: |
| 100 | + print("No data found for yesterday.") |
| 101 | + |
| 102 | +if __name__ == "__main__": |
| 103 | + fetch_analytics_data() |
0 commit comments