Merge pull request #91 from ElemarJR/last_twelve_months_view

Last twelve months view
eximiaco-labs · Jan 3, 2025 · 2f675cc · 2f675cc
2 parents 8458989 + 772f372
commit 2f675cc
Show file tree

Hide file tree

Showing 31 changed files with 1,050 additions and 96 deletions.
diff --git a/backend/api/src/operational_summaries/__init__.py b/backend/api/src/operational_summaries/__init__.py
@@ -1,7 +1,8 @@
 from ariadne import QueryType, ObjectType
 
 from .staleliness import resolve_staleliness
-
+from .allocation import resolve_allocation
 def setup_query_for_operational_summaries(query: QueryType):
     query.set_field('staleliness', resolve_staleliness)
+    query.set_field('allocation', resolve_allocation)
     return []
diff --git a/backend/api/src/operational_summaries/allocation.py b/backend/api/src/operational_summaries/allocation.py
@@ -0,0 +1,49 @@
+from datetime import datetime
+
+from omni_shared import globals
+
+def resolve_allocation(root, info, start_date=None, end_date=None, filters=None):
+    # Process dates
+    def parse_date(date_input):
+        if date_input is None:
+            return datetime.now()
+        if isinstance(date_input, str):
+            return datetime.strptime(date_input, '%Y-%m-%d')
+        return date_input
+
+    start_date = parse_date(start_date)
+    end_date = parse_date(end_date)
+
+    # Get and filter timesheet data
+    timesheet = globals.omni_datasets.timesheets.get(start_date, end_date)
+    df, result = globals.omni_datasets.apply_filters(
+        globals.omni_datasets.timesheets,
+        timesheet.data,
+        filters
+    )
+
+    # Define kind mappings
+    kind_map = {
+        'Consulting': 'consulting',
+        'Internal': 'internal',
+        'HandsOn': 'hands_on', 
+        'Squad': 'squad'
+    }
+
+    # Initialize structure with empty lists for each kind
+    by_kind = {normalized: [] for normalized in kind_map.values()}
+
+    # Process allocations in one pass
+    daily_allocation = df.groupby(['Date', 'Kind'])['TimeInHs'].sum()
+
+    for (date, kind), hours in daily_allocation.items():
+        if normalized_kind := kind_map.get(kind):
+            by_kind[normalized_kind].append({
+                'date': date.strftime('%Y-%m-%d'),
+                'hours': float(hours)
+            })
+
+    return {
+        'by_kind': by_kind,
+        'filterable_fields': result['filterable_fields']
+    }
diff --git a/backend/api/src/operational_summaries/schema.graphql b/backend/api/src/operational_summaries/schema.graphql
@@ -18,3 +18,20 @@ type StalenessWorker {
   name: String!
   slug: String!
 }
+
+type Allocation {
+  byKind: AllocationByKind!
+  filterableFields: [FilterableField!]!
+}
+
+type AllocationByKind {
+  consulting: [AllocationEntry!]!
+  internal: [AllocationEntry!]!
+  handsOn: [AllocationEntry!]!
+  squad: [AllocationEntry!]!
+}
+
+type AllocationEntry {
+  date: Date!
+  hours: Float!
+}
diff --git a/backend/api/src/schema.graphql b/backend/api/src/schema.graphql
@@ -66,6 +66,7 @@ type Query {
   yearlyForecast(year: Int): YearlyForecast!
 
   inConsulting(dateOfInterest: Date): InConsulting!
+  allocation(startDate: Date, endDate: Date, filters: [FilterInput]): Allocation!
 }
 
 type CacheItem {

diff --git a/backend/models/src/omni_models/datasets/timesheet_dataset/__init__.py b/backend/models/src/omni_models/datasets/timesheet_dataset/__init__.py
@@ -0,0 +1,4 @@
+from .main import TimesheetDataset
+from .models.memory_cache import TimesheetMemoryCache
+
+__all__ = ['TimesheetDataset', 'TimesheetMemoryCache'] 
diff --git a/...omni_models/datasets/timesheet_dataset.py → ...models/datasets/timesheet_dataset/main.py b/...omni_models/datasets/timesheet_dataset.py → ...models/datasets/timesheet_dataset/main.py
@@ -1,9 +1,9 @@
 import logging
 from datetime import datetime, timedelta
-from typing import List
-
 import pandas as pd
 import numpy as np
+import os
+from pathlib import Path
 
 from omni_utils.decorators.cache import cache
 from omni_models.base.powerdataframe import SummarizablePowerDataFrame
@@ -12,74 +12,25 @@
 from omni_utils.helpers.slug import slugify
 from omni_models.omnimodels import OmniModels
 
-class TimesheetMemoryCache:
-    def __init__(self):
-        self.cache = []
-
-    def get(self, after: datetime, before: datetime) -> SummarizablePowerDataFrame:
-        for m in self.cache:
-            if m['after'] <= after and m['before'] >= before:
-                df = m['result'].data
-                df = df[df['Date'] >= after.date()]
-                df = df[df['Date'] <= before.date()]
-                return SummarizablePowerDataFrame(df)
-        return None
-
-    def add(self, after: datetime, before: datetime, result: SummarizablePowerDataFrame):
-        self.cache.append({
-            "after": after,
-            "before": before,
-            "result": result, 
-            "created_at": datetime.now()
-        })
+import calendar
 
-    def list_cache(self, after, before):
-        if after:
-            if isinstance(after, str):
-                after = datetime.strptime(after, '%Y-%m-%d').date()
-            elif isinstance(after, datetime):
-                after = after.date()
-
-        if before:
-            if isinstance(before, str):
-                before = datetime.strptime(before, '%Y-%m-%d').date()
-            elif isinstance(before, datetime):
-                before = before.date()
-
-        return [
-            {
-                "after": m['after'],
-                "before": m['before'],
-                "created_at": m['created_at']
-            }
-            for m in self.cache
-            if (after is None or after >= m['after']) and (before is None or before <= m['before'])
-        ]
-
-    def invalidate(self, after, before):
-        if after:
-            if isinstance(after, str):
-                after = datetime.strptime(after, '%Y-%m-%d').date()
-            elif isinstance(after, datetime):
-                after = after.date()
-
-        if before:
-            if isinstance(before, str):
-                before = datetime.strptime(before, '%Y-%m-%d').date()
-            elif isinstance(before, datetime):
-                before = before.date()
-
-        self.cache = [
-            m 
-            for m in self.cache 
-            if (after is None or after >= m['after']) and (before is None or before <= m['before'])
-        ]
+from .models.memory_cache import TimesheetMemoryCache
+from .models.disk_cache import TimesheetDiskCache
 
 class TimesheetDataset(OmniDataset):
     def __init__(self, models: OmniModels = None):
         self.models = models or OmniModels()
         self.logger = logging.getLogger(self.__class__.__name__)
         self.memory = TimesheetMemoryCache()
+
+        api_key = os.getenv('EVERHOUR_API_KEY')
+        if not api_key:
+            raise ValueError("EVERHOUR_API_KEY environment variable is required")
+
+        cache_dir = Path("ts_2024")
+        self.disk = TimesheetDiskCache(cache_dir, api_key)
+
+        self._ensure_2024()
 
     def get_treemap_path(self):
         return 'TimeInHs', ['Kind', 'ClientName', 'WorkerName']
@@ -89,7 +40,22 @@ def get_filterable_fields(self):
 
     @cache
     def get(self, after: datetime, before: datetime) -> SummarizablePowerDataFrame:
+        first_day_of_month = after.replace(day=1)
+        df = pd.DataFrame()
+
+        while first_day_of_month < before:
+            last_day_of_month = first_day_of_month.replace(day=calendar.monthrange(first_day_of_month.year, first_day_of_month.month)[1])
+            result = self._get(first_day_of_month, last_day_of_month)
+            df = pd.concat([df, result.data])
+
+            first_day_of_month = last_day_of_month + timedelta(days=1)
 
+        df = df[df['Date'] >= after.date()]
+        df = df[df['Date'] <= before.date()]
+
+        return SummarizablePowerDataFrame(df)
+
+    def _get(self, after: datetime, before: datetime) -> SummarizablePowerDataFrame:
         result = self.memory.get(after, before)
         if result:
             self.logger.info(f"Getting appointments from cache from {after} to {before}.")
@@ -103,12 +69,10 @@ def get(self, after: datetime, before: datetime) -> SummarizablePowerDataFrame:
 
         data = [ap.to_dict() for ap in raw]
         df = pd.DataFrame(data)
-
-
+
         start_time = datetime.now()
         self.logger.info(f"Enriching timesheet data")
 
-
         # Check if df is empty
         if df.empty:
             return SummarizablePowerDataFrame(pd.DataFrame())
@@ -305,7 +269,6 @@ def get_all_fields(self):
                 'CaseTitle',
                 'Sponsor',
                 'SponsorSlug',
-                #'Case',
                 'ClientId',
                 'ClientName',
                 'ClientSlug',
@@ -330,4 +293,37 @@ def get_last_four_weeks_ltes(self) -> SummarizablePowerDataFrame:
                 .filter_by(by='CreatedAtWeek', not_equals_to=previous6)
                 )
 
-        return data
+        return data 
+
+    def _ensure_2024(self):
+        """Ensures all 2024 timesheet data is cached to disk."""
+        months = {
+            'jan': '01', 'fev': '02', 'mar': '03', 'abr': '04',
+            'mai': '05', 'jun': '06', 'jul': '07', 'ago': '08',
+            'set': '09', 'out': '10', 'nov': '11', 'dez': '12'
+        }
+
+        self.logger.info("Ensuring 2024 timesheet data is cached...")
+
+        for month_name, month_num in months.items():
+            filename = f"{month_name}_2024"
+            s = datetime(2024, int(month_num), 1, 0, 0, 0)
+            e = datetime(2024, int(month_num), calendar.monthrange(2024, int(month_num))[1], 23, 59, 59)
+
+            # Check if month data is already cached
+            cached_data = self.disk.load(filename)
+            if cached_data is not None:
+                self.memory.add(s, e, cached_data)
+                self.logger.info(f"Month {month_name}_2024 already cached")
+                continue
+
+            # If not cached, fetch from API
+            self.logger.info(f"Fetching {month_name}_2024 from API...")
+            dataset = self.get(s, e)
+
+            if dataset is not None:
+                self.logger.info(f"Saving {month_name}_2024 to disk cache...")
+                self.disk.save(dataset, filename)
+                self.memory.add(s, e, dataset)
+            else:
+                self.logger.warning(f"No data available for {month_name}_2024")
diff --git a/backend/models/src/omni_models/datasets/timesheet_dataset/models/disk_cache.py b/backend/models/src/omni_models/datasets/timesheet_dataset/models/disk_cache.py
@@ -0,0 +1,55 @@
+import pickle
+import base64
+from pathlib import Path
+from cryptography.fernet import Fernet
+from cryptography.hazmat.primitives import hashes
+from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
+
+class TimesheetDiskCache:
+    def __init__(self, cache_dir: str, api_key: str):
+        self.cache_dir = Path(cache_dir)
+        self.cache_dir.mkdir(parents=True, exist_ok=True)
+        self.fernet = self._get_encryption_key(api_key)
+
+    def _get_encryption_key(self, api_key: str) -> Fernet:
+        salt = b'omni_salt'
+        kdf = PBKDF2HMAC(
+            algorithm=hashes.SHA256(),
+            length=32,
+            salt=salt,
+            iterations=100000,
+        )
+        key = base64.urlsafe_b64encode(kdf.derive(api_key.encode()))
+        return Fernet(key)
+
+    def save(self, dataset, filename: str) -> None:
+        """Save an encrypted timesheet dataset to file"""
+        if dataset is None:
+            return
+
+        filepath = self.cache_dir / f"{filename}.timesheet"
+
+        # Serialize and encrypt the dataset
+        serialized = pickle.dumps(dataset)
+        encrypted = self.fernet.encrypt(serialized)
+
+        # Save to file
+        with open(filepath, "wb") as file:
+            file.write(encrypted)
+
+    def load(self, filename: str):
+        """Load an encrypted timesheet dataset from file"""
+        try:
+            filepath = self.cache_dir / f"{filename}.timesheet"
+
+            # Read encrypted data
+            with open(filepath, "rb") as file:
+                encrypted = file.read()
+
+            # Decrypt and deserialize
+            decrypted = self.fernet.decrypt(encrypted)
+            return pickle.loads(decrypted)
+        except:
+            return None
+
+