From 493af1bbdc8b846b630a1b0e2cedb2eb02ab7f4d Mon Sep 17 00:00:00 2001 From: Samyak Shah Date: Sun, 29 Sep 2024 18:36:14 -0400 Subject: [PATCH] 172 migrating from setuppy to pyprojecttoml (#173) * - Moved all project settings to pyproject.toml - Added a minimal setup.cfg for flake8 - Changed setup.py to minimal file for backwards compatability - Added in flake8 and isort for mode code consistency - Added pre-commit hooks for flake8 and isort * Updated readme file * Updated files and accounted for flake8 and isort issues - Updated files with isort for consistent imports - Fixed all flake8 issues - Changed imports to ensure they are accepted by flake8, and fixed circular dependencies * Removed placeholder emails from pyproject.toml --- .pre-commit-config.yaml | 9 +++ jobfunnel/__main__.py | 3 +- jobfunnel/backend/__init__.py | 2 + jobfunnel/backend/jobfunnel.py | 8 +-- jobfunnel/backend/scrapers/base.py | 2 +- jobfunnel/backend/scrapers/glassdoor.py | 5 +- jobfunnel/backend/scrapers/indeed.py | 14 ++-- jobfunnel/backend/scrapers/monster.py | 6 +- jobfunnel/backend/scrapers/registry.py | 23 ++++--- jobfunnel/backend/tools/__init__.py | 4 +- jobfunnel/backend/tools/delay.py | 2 +- jobfunnel/backend/tools/filters.py | 2 +- jobfunnel/backend/tools/tools.py | 4 +- jobfunnel/config/__init__.py | 19 +++++- jobfunnel/config/cli.py | 30 ++++++--- jobfunnel/config/manager.py | 5 +- jobfunnel/config/proxy.py | 2 +- jobfunnel/config/search.py | 7 +- jobfunnel/config/settings.py | 20 ++++-- jobfunnel/resources/__init__.py | 49 +++++++++++++- jobfunnel/resources/defaults.py | 5 +- jobfunnel/resources/resources.py | 3 +- pyproject.toml | 86 ++++++++++++++++++++++++- readme.md | 65 +++++++++++++++++++ setup.cfg | 20 ++++++ setup.py | 54 +--------------- tests/config/test_cli.py | 6 +- tests/conftest.py | 3 +- 28 files changed, 337 insertions(+), 121 deletions(-) create mode 100644 setup.cfg diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4ea10949..a6e68b43 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,3 +7,12 @@ repos: rev: "v3.1.0" # Specify Prettier version hooks: - id: prettier + - repo: https://github.com/pre-commit/mirrors-isort + rev: v5.10.1 + hooks: + - id: isort + args: ["--profile", "black"] + - repo: https://github.com/pycqa/flake8 + rev: 6.0.0 + hooks: + - id: flake8 diff --git a/jobfunnel/__main__.py b/jobfunnel/__main__.py index f592e8d5..43d32826 100755 --- a/jobfunnel/__main__.py +++ b/jobfunnel/__main__.py @@ -3,8 +3,9 @@ """ import os import sys + from .backend.jobfunnel import JobFunnel -from .config import parse_cli, build_config_dict, get_config_manager +from .config import build_config_dict, get_config_manager, parse_cli def main(): diff --git a/jobfunnel/backend/__init__.py b/jobfunnel/backend/__init__.py index 0188e570..93a9b2f9 100644 --- a/jobfunnel/backend/__init__.py +++ b/jobfunnel/backend/__init__.py @@ -1 +1,3 @@ from jobfunnel.backend.job import Job, JobStatus + +__all__ = ["Job", "JobStatus"] diff --git a/jobfunnel/backend/jobfunnel.py b/jobfunnel/backend/jobfunnel.py index 113cc8c7..c51312e1 100755 --- a/jobfunnel/backend/jobfunnel.py +++ b/jobfunnel/backend/jobfunnel.py @@ -3,27 +3,27 @@ """ import csv +from datetime import date, datetime, timedelta import json import os import pickle -from datetime import date, datetime, timedelta from time import time -from typing import Dict, List +from typing import Dict from requests import Session from jobfunnel import __version__ from jobfunnel.backend import Job from jobfunnel.backend.tools import Logger -from jobfunnel.backend.tools.filters import DuplicatedJob, JobFilter +from jobfunnel.backend.tools.filters import JobFilter from jobfunnel.config import JobFunnelConfigManager from jobfunnel.resources import ( CSV_HEADER, T_NOW, - Remoteness, DuplicateType, JobStatus, Locale, + Remoteness, ) diff --git a/jobfunnel/backend/scrapers/base.py b/jobfunnel/backend/scrapers/base.py index 28ba5d3f..01b2403d 100644 --- a/jobfunnel/backend/scrapers/base.py +++ b/jobfunnel/backend/scrapers/base.py @@ -2,10 +2,10 @@ Paul McInnis 2020 """ -import random from abc import ABC, abstractmethod from concurrent.futures import ThreadPoolExecutor, as_completed from multiprocessing import Lock, Manager +import random from time import sleep from typing import Any, Dict, List, Optional diff --git a/jobfunnel/backend/scrapers/glassdoor.py b/jobfunnel/backend/scrapers/glassdoor.py index fa220ba4..2f6b916f 100644 --- a/jobfunnel/backend/scrapers/glassdoor.py +++ b/jobfunnel/backend/scrapers/glassdoor.py @@ -2,10 +2,10 @@ FIXME: this is currently unable to get past page 1 of job results. """ -import re from abc import abstractmethod from concurrent.futures import ThreadPoolExecutor, wait from math import ceil +import re from typing import Any, Dict, List, Tuple, Union from bs4 import BeautifulSoup @@ -15,10 +15,9 @@ from jobfunnel.backend.scrapers.base import ( BaseCANEngScraper, BaseScraper, - BaseUSAEngScraper, BaseUKEngScraper, + BaseUSAEngScraper, ) -from jobfunnel.backend.tools import get_webdriver from jobfunnel.backend.tools.filters import JobFilter from jobfunnel.backend.tools.tools import calc_post_date_from_relative_str from jobfunnel.resources import MAX_CPU_WORKERS, JobField diff --git a/jobfunnel/backend/scrapers/indeed.py b/jobfunnel/backend/scrapers/indeed.py index b201c8f0..b3314524 100644 --- a/jobfunnel/backend/scrapers/indeed.py +++ b/jobfunnel/backend/scrapers/indeed.py @@ -1,13 +1,13 @@ """Scraper designed to get jobs from www.indeed.X """ -import re from concurrent.futures import ThreadPoolExecutor, wait +import json from math import ceil +import random +import re from typing import Any, Dict, List, Optional from unicodedata import normalize -import json -import random from bs4 import BeautifulSoup from requests import Session @@ -15,19 +15,19 @@ from jobfunnel.backend import Job from jobfunnel.backend.scrapers.base import ( BaseCANEngScraper, + BaseDEGerScraper, + BaseFRFreScraper, BaseScraper, - BaseUSAEngScraper, BaseUKEngScraper, - BaseFRFreScraper, - BaseDEGerScraper, + BaseUSAEngScraper, ) from jobfunnel.backend.tools.filters import JobFilter from jobfunnel.backend.tools.tools import calc_post_date_from_relative_str from jobfunnel.resources import ( MAX_CPU_WORKERS, + USER_AGENT_LIST_MOBILE, JobField, Remoteness, - USER_AGENT_LIST_MOBILE, ) # pylint: disable=using-constant-test,unused-import diff --git a/jobfunnel/backend/scrapers/monster.py b/jobfunnel/backend/scrapers/monster.py index a92c9250..0a001b7e 100644 --- a/jobfunnel/backend/scrapers/monster.py +++ b/jobfunnel/backend/scrapers/monster.py @@ -1,9 +1,9 @@ """Scrapers for www.monster.X """ -import re from abc import abstractmethod from math import ceil +import re from typing import Any, Dict, List, Optional from bs4 import BeautifulSoup @@ -12,10 +12,10 @@ from jobfunnel.backend import Job from jobfunnel.backend.scrapers.base import ( BaseCANEngScraper, + BaseFRFreScraper, BaseScraper, - BaseUSAEngScraper, BaseUKEngScraper, - BaseFRFreScraper, + BaseUSAEngScraper, ) from jobfunnel.backend.tools.filters import JobFilter from jobfunnel.backend.tools.tools import calc_post_date_from_relative_str diff --git a/jobfunnel/backend/scrapers/registry.py b/jobfunnel/backend/scrapers/registry.py index 2e52d4ea..7e116aa3 100644 --- a/jobfunnel/backend/scrapers/registry.py +++ b/jobfunnel/backend/scrapers/registry.py @@ -4,26 +4,25 @@ TODO: there must be a better way to do this by using class attrib of Provider """ -from jobfunnel.resources import Locale, Provider - +from jobfunnel.backend.scrapers.glassdoor import ( + GlassDoorScraperCANEng, + GlassDoorScraperUKEng, + GlassDoorScraperUSAEng, +) from jobfunnel.backend.scrapers.indeed import ( IndeedScraperCANEng, - IndeedScraperUSAEng, - IndeedScraperUKEng, - IndeedScraperFRFre, IndeedScraperDEGer, + IndeedScraperFRFre, + IndeedScraperUKEng, + IndeedScraperUSAEng, ) from jobfunnel.backend.scrapers.monster import ( MonsterScraperCANEng, - MonsterScraperUSAEng, - MonsterScraperUKEng, MonsterScraperFRFre, + MonsterScraperUKEng, + MonsterScraperUSAEng, ) -from jobfunnel.backend.scrapers.glassdoor import ( - GlassDoorScraperCANEng, - GlassDoorScraperUSAEng, - GlassDoorScraperUKEng, -) +from jobfunnel.resources import Locale, Provider SCRAPER_FROM_LOCALE = { # search terms which one to use diff --git a/jobfunnel/backend/tools/__init__.py b/jobfunnel/backend/tools/__init__.py index ef7d46bd..1287b303 100644 --- a/jobfunnel/backend/tools/__init__.py +++ b/jobfunnel/backend/tools/__init__.py @@ -1,3 +1,5 @@ -from jobfunnel.backend.tools.tools import get_webdriver, get_logger, Logger +from jobfunnel.backend.tools.tools import Logger, get_logger, get_webdriver + +__all__ = ["get_webdriver", "get_logger", "Logger"] # NOTE: we can't import delays here or we cause circular import. diff --git a/jobfunnel/backend/tools/delay.py b/jobfunnel/backend/tools/delay.py index a4346117..7e0f60fe 100644 --- a/jobfunnel/backend/tools/delay.py +++ b/jobfunnel/backend/tools/delay.py @@ -8,7 +8,7 @@ from numpy import arange from scipy.special import expit # pylint: disable=no-name-in-module -from jobfunnel.config import DelayConfig +from jobfunnel.config.delay import DelayConfig from jobfunnel.resources import DelayAlgorithm diff --git a/jobfunnel/backend/tools/filters.py b/jobfunnel/backend/tools/filters.py index e107b17d..1e8f36cd 100644 --- a/jobfunnel/backend/tools/filters.py +++ b/jobfunnel/backend/tools/filters.py @@ -3,10 +3,10 @@ Paul McInnis 2020 """ -import logging from collections import namedtuple from copy import deepcopy from datetime import datetime +import logging from typing import Dict, List, Optional, Tuple import nltk diff --git a/jobfunnel/backend/tools/tools.py b/jobfunnel/backend/tools/tools.py index a198d160..12e469c8 100644 --- a/jobfunnel/backend/tools/tools.py +++ b/jobfunnel/backend/tools/tools.py @@ -1,10 +1,10 @@ """Assorted tools for all aspects of funnelin' that don't fit elsewhere """ +from datetime import date, datetime, timedelta import logging import re import sys -from datetime import date, datetime, timedelta from typing import Optional from dateutil.relativedelta import relativedelta @@ -14,8 +14,6 @@ from webdriver_manager.microsoft import EdgeChromiumDriverManager, IEDriverManager from webdriver_manager.opera import OperaDriverManager -from jobfunnel.backend import Job - # Initialize list and store regex objects of date quantifiers HOUR_REGEX = re.compile(r"(\d+)(?:[ +]{1,3})?(?:hour|hr|heure)") DAY_REGEX = re.compile(r"(\d+)(?:[ +]{1,3})?(?:day|d|jour)") diff --git a/jobfunnel/config/__init__.py b/jobfunnel/config/__init__.py index d742dc4b..59f75208 100644 --- a/jobfunnel/config/__init__.py +++ b/jobfunnel/config/__init__.py @@ -1,7 +1,20 @@ -from jobfunnel.config.settings import SettingsValidator, SETTINGS_YAML_SCHEMA from jobfunnel.config.base import BaseConfig +from jobfunnel.config.cli import build_config_dict, get_config_manager, parse_cli from jobfunnel.config.delay import DelayConfig +from jobfunnel.config.manager import JobFunnelConfigManager from jobfunnel.config.proxy import ProxyConfig from jobfunnel.config.search import SearchConfig -from jobfunnel.config.manager import JobFunnelConfigManager -from jobfunnel.config.cli import parse_cli, get_config_manager, build_config_dict +from jobfunnel.config.settings import SETTINGS_YAML_SCHEMA, SettingsValidator + +__all__ = [ + "SettingsValidator", + "SETTINGS_YAML_SCHEMA", + "BaseConfig", + "DelayConfig", + "ProxyConfig", + "SearchConfig", + "JobFunnelConfigManager", + "parse_cli", + "get_config_manager", + "build_config_dict", +] diff --git a/jobfunnel/config/cli.py b/jobfunnel/config/cli.py index 1683a93b..b8224713 100644 --- a/jobfunnel/config/cli.py +++ b/jobfunnel/config/cli.py @@ -2,16 +2,15 @@ """ import argparse -from typing import Dict, Any, List +from typing import Any, Dict, List + import yaml -from jobfunnel.config import ( - DelayConfig, - JobFunnelConfigManager, - ProxyConfig, - SearchConfig, - SettingsValidator, -) +from jobfunnel.config.delay import DelayConfig +from jobfunnel.config.manager import JobFunnelConfigManager +from jobfunnel.config.proxy import ProxyConfig +from jobfunnel.config.search import SearchConfig +from jobfunnel.config.settings import SettingsValidator from jobfunnel.resources import ( LOG_LEVEL_NAMES, DelayAlgorithm, @@ -19,7 +18,17 @@ Provider, Remoteness, ) -from jobfunnel.resources.defaults import * +from jobfunnel.resources.defaults import ( + DEFAULT_COMPANY_BLOCK_LIST, + DEFAULT_DELAY_ALGORITHM, + DEFAULT_DELAY_MAX_DURATION, + DEFAULT_DELAY_MIN_DURATION, + DEFAULT_LOG_LEVEL_NAME, + DEFAULT_MAX_LISTING_DAYS, + DEFAULT_PROVIDER_NAMES, + DEFAULT_REMOTENESS, + DEFAULT_SEARCH_RADIUS, +) def parse_cli(args: List[str]) -> Dict[str, Any]: @@ -153,7 +162,7 @@ def parse_cli(args: List[str]) -> Dict[str, Any]: "-l", dest="search.locale", type=str, - choices=[l.name for l in Locale], + choices=[locale.name for locale in Locale], help="Global location and language to use to scrape the job provider" " website (i.e. -l CANADA_ENGLISH -p indeed --> indeed.ca).", required=True, @@ -305,6 +314,7 @@ def build_config_dict(args_dict: Dict[str, Any]) -> Dict[str, Any]: """Parse the JobFunnel configuration settings and combine CLI, YAML and defaults to build a valid config dictionary for initializing config objects. """ + # Build a config that respects CLI, defaults and YAML # NOTE: we a passed settings YAML first so we can inject CLI after if needed if "settings_yaml_file" in args_dict: diff --git a/jobfunnel/config/manager.py b/jobfunnel/config/manager.py index 7d9a8c42..aed6e29b 100644 --- a/jobfunnel/config/manager.py +++ b/jobfunnel/config/manager.py @@ -6,7 +6,10 @@ from typing import List, Optional from jobfunnel.backend.scrapers.registry import SCRAPER_FROM_LOCALE -from jobfunnel.config import BaseConfig, DelayConfig, ProxyConfig, SearchConfig +from jobfunnel.config.base import BaseConfig +from jobfunnel.config.delay import DelayConfig +from jobfunnel.config.proxy import ProxyConfig +from jobfunnel.config.search import SearchConfig from jobfunnel.resources import BS4_PARSER # pylint: disable=using-constant-test,unused-import diff --git a/jobfunnel/config/proxy.py b/jobfunnel/config/proxy.py index a75ad665..f5ed4537 100644 --- a/jobfunnel/config/proxy.py +++ b/jobfunnel/config/proxy.py @@ -25,7 +25,7 @@ def validate(self) -> None: try: # try to create an IPv4 address ipaddress.IPv4Address(self.ip_address) - except: + except Exception: raise ValueError(f"{self.ip_address} is not a valid IPv4 address") assert isinstance(self.port, int), "Port must be an integer" assert self.protocol, "Protocol is not set" diff --git a/jobfunnel/config/search.py b/jobfunnel/config/search.py index c207eabe..e98cf563 100644 --- a/jobfunnel/config/search.py +++ b/jobfunnel/config/search.py @@ -2,12 +2,13 @@ """ from typing import List, Optional + from jobfunnel.config import BaseConfig from jobfunnel.resources import Locale, Provider, Remoteness from jobfunnel.resources.defaults import ( - DEFAULT_SEARCH_RADIUS, - DEFAULT_MAX_LISTING_DAYS, DEFAULT_DOMAIN_FROM_LOCALE, + DEFAULT_MAX_LISTING_DAYS, + DEFAULT_SEARCH_RADIUS, ) @@ -65,7 +66,7 @@ def __init__( # Try to infer the domain string based on the locale. if not domain: - if not self.locale in DEFAULT_DOMAIN_FROM_LOCALE: + if self.locale not in DEFAULT_DOMAIN_FROM_LOCALE: raise ValueError(f"Unknown domain for locale: {self.locale}") self.domain = DEFAULT_DOMAIN_FROM_LOCALE[self.locale] else: diff --git a/jobfunnel/config/settings.py b/jobfunnel/config/settings.py index 52fedcf5..110b7171 100644 --- a/jobfunnel/config/settings.py +++ b/jobfunnel/config/settings.py @@ -12,8 +12,20 @@ Provider, Remoteness, ) -from jobfunnel.resources.defaults import * - +from jobfunnel.resources.defaults import ( + DEFAULT_COMPANY_BLOCK_LIST, + DEFAULT_DELAY_ALGORITHM, + DEFAULT_DELAY_MAX_DURATION, + DEFAULT_DELAY_MIN_DURATION, + DEFAULT_LOG_LEVEL_NAME, + DEFAULT_MAX_LISTING_DAYS, + DEFAULT_PROVIDERS, + DEFAULT_RANDOM_CONVERGING_DELAY, + DEFAULT_RANDOM_DELAY, + DEFAULT_REMOTENESS, + DEFAULT_RETURN_SIMILAR_RESULTS, + DEFAULT_SEARCH_RADIUS, +) SETTINGS_YAML_SCHEMA = { "master_csv_file": { @@ -57,7 +69,7 @@ }, "locale": { "required": True, - "allowed": [l.name for l in Locale], + "allowed": [locale.name for locale in Locale], }, "province_or_state": {"required": True, "type": "string"}, "city": {"required": True, "type": "string"}, @@ -167,7 +179,7 @@ def _validate_type_ipv4address(self, value): # module ipaddress.IPv4Address(value) return True - except: + except Exception: self._error(value, "Not a valid IPv4 address") diff --git a/jobfunnel/resources/__init__.py b/jobfunnel/resources/__init__.py index 5f0c1620..21ae220e 100644 --- a/jobfunnel/resources/__init__.py +++ b/jobfunnel/resources/__init__.py @@ -1,2 +1,47 @@ -from jobfunnel.resources.resources import * -from jobfunnel.resources.enums import * +from jobfunnel.resources.enums import ( + DelayAlgorithm, + DuplicateType, + JobField, + JobStatus, + Locale, + Provider, + Remoteness, +) +from jobfunnel.resources.resources import ( + BS4_PARSER, + CSV_HEADER, + DEFAULT_MAX_TFIDF_SIMILARITY, + LOG_LEVEL_NAMES, + MAX_BLOCK_LIST_DESC_CHARS, + MAX_CPU_WORKERS, + MIN_DESCRIPTION_CHARS, + MIN_JOBS_TO_PERFORM_SIMILARITY_SEARCH, + PRINTABLE_STRINGS, + T_NOW, + USER_AGENT_LIST, + USER_AGENT_LIST_MOBILE, + load_user_agents, +) + +__all__ = [ + "CSV_HEADER", + "LOG_LEVEL_NAMES", + "MIN_DESCRIPTION_CHARS", + "MAX_CPU_WORKERS", + "MIN_JOBS_TO_PERFORM_SIMILARITY_SEARCH", + "MAX_BLOCK_LIST_DESC_CHARS", + "DEFAULT_MAX_TFIDF_SIMILARITY", + "BS4_PARSER", + "T_NOW", + "PRINTABLE_STRINGS", + "load_user_agents", + "USER_AGENT_LIST", + "USER_AGENT_LIST_MOBILE", + "Locale", + "JobStatus", + "JobField", + "Remoteness", + "DuplicateType", + "Provider", + "DelayAlgorithm", +] diff --git a/jobfunnel/resources/defaults.py b/jobfunnel/resources/defaults.py index ad39e682..c026b165 100644 --- a/jobfunnel/resources/defaults.py +++ b/jobfunnel/resources/defaults.py @@ -2,10 +2,7 @@ NOTE: Not all defaults here are used, as we rely on YAML for demo and not kwargs """ -import os -from pathlib import Path -from jobfunnel.resources.enums import Locale, DelayAlgorithm, Provider, Remoteness - +from jobfunnel.resources.enums import DelayAlgorithm, Locale, Provider, Remoteness DEFAULT_LOG_LEVEL_NAME = "INFO" DEFAULT_LOCALE = Locale.CANADA_ENGLISH diff --git a/jobfunnel/resources/resources.py b/jobfunnel/resources/resources.py index 03ca6e7a..bc278a6f 100644 --- a/jobfunnel/resources/resources.py +++ b/jobfunnel/resources/resources.py @@ -2,9 +2,8 @@ """ import datetime -import os -import string from pathlib import Path +import string # CSV header for output CSV. do not remove anything or you'll break usr's CSV's # TODO: need to add short and long descriptions (breaking change) diff --git a/pyproject.toml b/pyproject.toml index ea756dfe..2afdbf45 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,87 @@ +[build-system] +requires = ["setuptools>=75.1.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "JobFunnel" +dynamic = ["version"] +description = "Automated tool for scraping job postings." +readme = "readme.md" +requires-python = ">=3.11" +license = {text = "MIT License"} +authors = [ + { name = "Paul McInnis", email = "paulmcinnis99@gmail.com" }, + { name = "Bradley Kohler" }, + { name = "Jose Alarcon" }, + { name = "Erich Mengore" }, + { name = "Mark van der Broek" }, +] +classifiers = [ + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.11", +] +dependencies = [ + "beautifulsoup4>=4.6.3", + "lxml>=4.2.4", + "requests>=2.19.1", + "python-dateutil>=2.8.0", + "PyYAML>=5.1", + "scikit-learn>=0.21.2", + "nltk>=3.4.1", + "scipy>=1.4.1", + "selenium>=3.141.0", + "webdriver-manager>=2.4.0", + "Cerberus>=1.3.2", + "tqdm>=4.47.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=5.3.1", + "pytest-mock>=3.1.1", + "pytest-cov", + "flake8", + "isort>=5.10.1", + "black>=24.8.0", + "pre-commit>=3.8.0", +] + +[project.urls] +Homepage = "https://github.com/PaulMcInnis/JobFunnel" +Documentation = "https://github.com/PaulMcInnis/JobFunnel/docs" +Repository = "https://github.com/PaulMcInnis/JobFunnel" +Issues = "https://github.com/PaulMcInnis/JobFunnel/issues" + +[project.scripts] +funnel = "jobfunnel.__main__:main" + +[tool.setuptools.dynamic] +version = {attr = "jobfunnel.__version__"} + +[tool.setuptools.packages.find] +exclude = ["tests", "docs", "images"] + +[tool.setuptools] +include-package-data = true + +[tool.pytest.ini_options] +addopts = "--cov=jobfunnel" + [tool.black] -line-length = 88 # This is the default +line-length = 88 include = '\.pyi?$' # This will include all .py and .pyi files + +[tool.isort] +profile = "black" +line_length = 88 +known_third_party = ["beautifulsoup4", "lxml", "requests", "python-dateutil", "PyYAML", "scikit-learn", "nltk", "scipy", "selenium", "webdriver-manager", "Cerberus", "tqdm"] +known_first_party = ["jobfunnel"] +default_section = "THIRDPARTY" +force_sort_within_sections = true +multi_line_output = 3 +include_trailing_comma = true + +[tool.flake8] +max-line-length = 120 +extend-ignore = "E203" \ No newline at end of file diff --git a/readme.md b/readme.md index e66e4098..6dd58e59 100644 --- a/readme.md +++ b/readme.md @@ -102,6 +102,71 @@ Open the master CSV file and update the per-job `status`: `Unable to extract jobs from initial search result page:\` error. Then open that url on your browser and solve the CAPTCHA manually. +# Developer Guide + +For contributors and developers who want to work on JobFunnel, this section will guide you through setting up the development environment and the tools we use to maintain code quality and consistency. + +## Developer Mode Installation + +To get started, install JobFunnel in **developer mode**. This will install all necessary dependencies, including development tools such as testing, linting, and formatting utilities. + +To install JobFunnel in developer mode, use the following command: + +```bash +pip install -e '.[dev]' +``` + +This command not only installs the package in an editable state but also sets up pre-commit hooks for automatic code quality checks. + +## Pre-Commit Hooks + +The following pre-commit hooks are configured to run automatically when you commit changes to ensure the code follows consistent style and quality guidelines: + +- `Black`: Automatically formats Python code to ensure consistency. +- `isort`: Sorts and organizes imports according to the Black style. +- `Prettier`: Formats non-Python files such as YAML and JSON. +- `Flake8`: Checks Python code for style guide violations. + +While the pre-commit package is installed when you run `pip install -e '.[dev]'`, you still need to initialize the hooks by running the following command once: + +```bash +pre-commit install +``` + +### How Pre-Commit Hooks Work + +The pre-commit hooks will automatically run when you attempt to make a commit. If any formatting issues are found, the hooks will fix them (for Black and isort), or warn you about style violations (for Flake8). This ensures that all committed code meets the project’s quality standards. + +You can also manually run the pre-commit hooks at any time with: + +```bash +pre-commit run --all-files +``` + +This is useful to check the entire codebase before committing or as part of a larger code review. Please fix all style guide violations (or provide a reason to ignore) before committing to the repository. + +## Running Tests + +We use `pytest` to run tests and ensure that the code behaves as expected. Code coverage is automatically generated every time you run the tests. + +To run all tests, use the following command: + +```bash +pytest +``` + +This will execute the test suite and automatically generate a code coverage report. + +If you want to see a detailed code coverage report, you can run: + +```bash +pytest --cov-report=term-missing +``` + +This will display which lines of code were missed in the test coverage directly in your terminal output. + + + [requirements]:requirements.txt [masterlist]:demo/demo.png "masterlist.csv" diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..5ed25eb8 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,20 @@ +# setup.cfg + +[metadata] +# Section left intentionally empty due to pyproject.toml + +[options] +# Section left intentionally empty due to pyproject.toml + +[options.packages.find] +exclude = + tests + docs + images + +[tool.setuptools] +include-package-data = true + +[flake8] +max-line-length = 120 +extend-ignore = E203 \ No newline at end of file diff --git a/setup.py b/setup.py index a9ef2c90..7f1a1763 100644 --- a/setup.py +++ b/setup.py @@ -1,52 +1,4 @@ -from pathlib import Path -from setuptools import setup, find_packages +from setuptools import setup -from jobfunnel import __version__ as version - - -description = "Automated tool for scraping job postings." -url = "https://github.com/PaulMcInnis/JobFunnel" -requires = [ - "beautifulsoup4>=4.6.3", - "lxml>=4.2.4", - "requests>=2.19.1", - "python-dateutil>=2.8.0", - "PyYAML>=5.1", - "scikit-learn>=0.21.2", - "nltk>=3.4.1", - "scipy>=1.4.1", - "pytest>=5.3.1", - "pytest-mock>=3.1.1", - "selenium>=3.141.0", - "webdriver-manager>=2.4.0", - "Cerberus>=1.3.2", - "tqdm>=4.47.0", - "flake8", - "pipenv", - "pytest-cov", -] -here = Path(__file__).parent -readme = (here / "readme.md").read_text() - -setup( - name="JobFunnel", - version=version, - description=description, - long_description=readme, - long_description_content_type="text/markdown", - author="Paul McInnis, Bradley Kohler, Jose Alarcon, Erich Mengore, " - "Mark van der Broek", - author_email="paulmcinnis99@gmail.com", - url=url, - license="MIT License", - python_requires=">=3.11", - install_requires=requires, - packages=find_packages(exclude=("tests", "docs", "images")), - include_package_data=True, - entry_points={"console_scripts": ["funnel = jobfunnel.__main__:main"]}, - classifiers=[ - "License :: OSI Approved :: MIT License", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.11", - ], -) +if __name__ == "__main__": + setup() diff --git a/tests/config/test_cli.py b/tests/config/test_cli.py index 893b85b3..9b772e3b 100644 --- a/tests/config/test_cli.py +++ b/tests/config/test_cli.py @@ -2,8 +2,10 @@ """ import os + import pytest -from jobfunnel.config import parse_cli, build_config_dict + +from jobfunnel.config import build_config_dict, parse_cli from tests.conftest import get_data_path TEST_YAML = os.path.join(get_data_path(), "test_config.yml") @@ -181,6 +183,7 @@ def test_parse_cli_load(argv): def test_parse_cli_invalid_args(argv, exception): with pytest.raises(exception) as e: args = parse_cli(argv) + assert args is not None # TODO: Remove after test is fixed assert str(e.value) == "2" @@ -189,6 +192,7 @@ def test_build_config_dict_invalid_settings(argv, exception): args = parse_cli(argv) with pytest.raises(exception) as e: cfg_dict = build_config_dict(args) + assert cfg_dict is not None # TODO: Remove after test is fixed assert ( str(e.value) == "Invalid Config settings yaml:\n" "{'search': [{'radius': ['must be of integer type']}]}" diff --git a/tests/conftest.py b/tests/conftest.py index 72ea47c1..4a8343d5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,7 @@ -import pytest import os +import pytest # noqa=F401 - TODO: Remove this once we have tests + # TODO: This should be a fixture. For now it is not because fixtures cannot be easily called as regular functions. def get_data_path():