diff --git a/docs/lahman.md b/docs/lahman.md
index 8f219f69..5f094088 100644
--- a/docs/lahman.md
+++ b/docs/lahman.md
@@ -1,10 +1,10 @@
 # Lahman Data Acquisition Functions
 
-Pull data from [Sean Lahman's database](http://www.seanlahman.com/baseball-archive/statistics/), also hosted by [Chadwick Bureau on GitHub](https://github.com/chadwickbureau/baseballdatabank) -- our new source -- using the following functions:
+Pulls data linked from [Sean Lahman's database](http://seanlahman.com/) now hosted on dropbox -- using the following functions:
 
 ```python
 from pybaseball.lahman import *
-download_lahman() #download the entire lahman database to your current working directory
+download_lahman()
 
 # a table of all player biographical info and ids
 people = people()
@@ -81,7 +81,7 @@ schools = schools()
 series_post = series_post()
 
 # data on teams by year: record, division, stadium, attendance, etc
-teams = teams()
+teams = teams_core()
 
 # current and historical franchises, whether they're still active, and their ids
 teams_franchises = teams_franchises()
diff --git a/pybaseball/__init__.py b/pybaseball/__init__.py
index cc223855..852c0220 100644
--- a/pybaseball/__init__.py
+++ b/pybaseball/__init__.py
@@ -79,7 +79,6 @@
 from .lahman import schools
 from .lahman import series_post
 from .lahman import teams_core
-from .lahman import teams_upstream
 from .lahman import teams_franchises
 from .lahman import teams_half
 from .lahman import download_lahman
diff --git a/pybaseball/lahman.py b/pybaseball/lahman.py
index 437096eb..0d7c0ab9 100644
--- a/pybaseball/lahman.py
+++ b/pybaseball/lahman.py
@@ -1,136 +1,161 @@
+from datetime import timedelta
 from io import BytesIO
+from os import makedirs
 from os import path
-from typing import Optional
-from zipfile import ZipFile
 
+from bs4 import BeautifulSoup
 import pandas as pd
+from pathlib import Path
+from py7zr import SevenZipFile
 import requests
+from requests_cache import CachedSession
 
 from . import cache
 
-url = "https://github.com/chadwickbureau/baseballdatabank/archive/master.zip"
-base_string = "baseballdatabank-master"
-
-_handle = None
-
-def get_lahman_zip() -> Optional[ZipFile]:
-    # Retrieve the Lahman database zip file, returns None if file already exists in cwd.
-    # If we already have the zip file, keep re-using that.
-    # Making this a function since everything else will be re-using these lines
-    global _handle
-    if path.exists(path.join(cache.config.cache_directory, base_string)):
-        _handle = None
-    elif not _handle:
-        s = requests.get(url, stream=True)
-        _handle = ZipFile(BytesIO(s.content))
-    return _handle
-
-def download_lahman():
-    # download entire lahman db to present working directory
-    z = get_lahman_zip()
-    if z is not None:
-        z.extractall(cache.config.cache_directory)
-        z = get_lahman_zip()
-        # this way we'll now start using the extracted zip directory
-        # instead of the session ZipFile object
-
-def _get_file(tablename: str, quotechar: str = "'") -> pd.DataFrame:
-    z = get_lahman_zip()
-    f = f'{base_string}/{tablename}'
+# NB: response will be cached for 30 days unless force is True
+def _get_response(force:bool=False) -> requests.Response:
+    session = _get_session()
+    response = session.get("http://seanlahman.com", refresh=force)
+    return response
+
+# For example, "https://www.dropbox.com/scl/fi/hy0sxw6gaai7ghemrshi8/lahman_1871-2023_csv.7z?rlkey=edw1u63zzxg48gvpcmr3qpnhz&dl=1"
+def _get_download_url(force:bool=False) -> str:
+    response = _get_response(force)
+    soup = BeautifulSoup(response.content, "html.parser")
+
+    anchor = soup.find("a", string="Comma-delimited version")
+    url = anchor["href"].replace("dl=0", "dl=1")
+
+    return url
+
+def _get_cache_dir() -> str:
+    return f"{cache.config.cache_directory}/lahman"
+
+def _get_session() -> CachedSession:
+    return CachedSession(_get_cache_dir(), expire_after=timedelta(days=30))
+
+def _get_base_string() -> str:
+    url = _get_download_url()
+    path = Path(url)
+
+    return path.stem
+
+def _get_file_path(filename: str = "") -> str:
+    base_string = _get_base_string()
+    return path.join(_get_cache_dir(), base_string, filename)
+
+def _get_table(filename: str,
+               quotechar: str = "'",
+               encoding=None,
+               on_bad_lines="error") -> pd.DataFrame:
+    filepath = _get_file_path(filename)
     data = pd.read_csv(
-        f"{path.join(cache.config.cache_directory, f)}" if z is None else z.open(f),
+        filepath,
         header=0,
-        sep=',',
-        quotechar=quotechar
+        sep=",",
+        quotechar=quotechar,
+        encoding=encoding,
+        on_bad_lines=on_bad_lines,
     )
     return data
 
+# Return whether download happened (True) or if cache used (False)
+def download_lahman(force: bool = False) -> bool:
+    if force or not path.exists(_get_file_path()):
+        cache_dir = _get_cache_dir()
+        base_string = _get_base_string()
+        makedirs(f"{cache_dir}/{base_string}", exist_ok=True)
 
-# do this for every table in the lahman db so they can exist as separate functions
-def parks() -> pd.DataFrame:
-    return _get_file('core/Parks.csv')
+        url = _get_download_url(force)
+        stream = requests.get(url, stream=True)
+        with SevenZipFile(BytesIO(stream.content)) as zip:
+            zip.extractall(cache_dir)
+        return True
+    return False
 
+# do this for every table in the lahman db so they can exist as separate functions
 def all_star_full() -> pd.DataFrame:
-    return _get_file("core/AllstarFull.csv")
+    return _get_table("AllstarFull.csv")
 
 def appearances() -> pd.DataFrame:
-    return _get_file("core/Appearances.csv")
+    return _get_table("Appearances.csv")
 
 def awards_managers() -> pd.DataFrame:
-    return _get_file("contrib/AwardsManagers.csv")
+    return _get_table("AwardsManagers.csv")
 
 def awards_players() -> pd.DataFrame:
-    return _get_file("contrib/AwardsPlayers.csv")
+    return _get_table("AwardsPlayers.csv")
 
 def awards_share_managers() -> pd.DataFrame:
-    return _get_file("contrib/AwardsShareManagers.csv")
+    return _get_table("AwardsShareManagers.csv")
 
 def awards_share_players() -> pd.DataFrame:
-    return _get_file("contrib/AwardsSharePlayers.csv")
+    return _get_table("AwardsSharePlayers.csv")
 
 def batting() -> pd.DataFrame:
-    return _get_file("core/Batting.csv")
+    return _get_table("Batting.csv")
 
 def batting_post() -> pd.DataFrame:
-    return _get_file("core/BattingPost.csv")
+    return _get_table("BattingPost.csv")
 
 def college_playing() -> pd.DataFrame:
-    return _get_file("contrib/CollegePlaying.csv")
+    return _get_table("CollegePlaying.csv")
 
 def fielding() -> pd.DataFrame:
-    return _get_file("core/Fielding.csv")
+    return _get_table("Fielding.csv")
 
 def fielding_of() -> pd.DataFrame:
-    return _get_file("core/FieldingOF.csv")
+    return _get_table("FieldingOF.csv")
 
 def fielding_of_split() -> pd.DataFrame:
-    return _get_file("core/FieldingOFsplit.csv")
+    return _get_table("FieldingOFsplit.csv")
 
 def fielding_post() -> pd.DataFrame:
-    return _get_file("core/FieldingPost.csv")
+    return _get_table("FieldingPost.csv")
 
 def hall_of_fame() -> pd.DataFrame:
-    return _get_file("contrib/HallOfFame.csv")
+    return _get_table("HallOfFame.csv")
 
 def home_games() -> pd.DataFrame:
-    return _get_file("core/HomeGames.csv")
+    return _get_table("HomeGames.csv")
 
 def managers() -> pd.DataFrame:
-    return _get_file("core/Managers.csv")
+    return _get_table("Managers.csv")
 
 def managers_half() -> pd.DataFrame:
-    return _get_file("core/ManagersHalf.csv")
+    return _get_table("ManagersHalf.csv")
 
 def master() -> pd.DataFrame:
     # Alias for people -- the new name for master
     return people()
 
+def parks() -> pd.DataFrame:
+    return _get_table("Parks.csv", encoding="unicode_escape")
+
 def people() -> pd.DataFrame:
-    return _get_file("core/People.csv")
+    return _get_table("People.csv", encoding="unicode_escape")
 
 def pitching() -> pd.DataFrame:
-    return _get_file("core/Pitching.csv")
+    return _get_table("Pitching.csv")
 
 def pitching_post() -> pd.DataFrame:
-    return _get_file("core/PitchingPost.csv")
+    return _get_table("PitchingPost.csv")
 
 def salaries() -> pd.DataFrame:
-    return _get_file("contrib/Salaries.csv")
+    return _get_table("Salaries.csv")
 
 def schools() -> pd.DataFrame:
-    return _get_file("contrib/Schools.csv", quotechar='"')  # different here bc of doublequotes used in some school names
+    # NB: one line is bad; "brklyncuny" should use double quotes, but doesn't
+    return _get_table("Schools.csv", quotechar='"', on_bad_lines="skip")
 
 def series_post() -> pd.DataFrame:
-    return _get_file("core/SeriesPost.csv")
+    return _get_table("SeriesPost.csv")
 
 def teams_core() -> pd.DataFrame:
-    return _get_file("core/Teams.csv")
-
-def teams_upstream() -> pd.DataFrame:
-    return _get_file("upstream/Teams.csv") # manually maintained file
+    return _get_table("Teams.csv")
 
 def teams_franchises() -> pd.DataFrame:
-    return _get_file("core/TeamsFranchises.csv")
+    return _get_table("TeamsFranchises.csv")
 
 def teams_half() -> pd.DataFrame:
-    return _get_file("core/TeamsHalf.csv")
+    return _get_table("TeamsHalf.csv")
diff --git a/setup.py b/setup.py
index 2d76831a..d1aacf1d 100644
--- a/setup.py
+++ b/setup.py
@@ -92,6 +92,8 @@
                       'matplotlib>=2.0.0',
                       'tqdm>=4.50.0',
                       'attrs>=20.3.0',
+                      'py7zr>=0.22.0',
+                      'requests_cache>=1.2.1',
                       ],
 
     # List additional groups of dependencies here (e.g. development
diff --git a/tests/pybaseball/conftest.py b/tests/pybaseball/conftest.py
index 6c9a845a..ea8d66d7 100644
--- a/tests/pybaseball/conftest.py
+++ b/tests/pybaseball/conftest.py
@@ -131,83 +131,70 @@ def get_contents(filename: str) -> str:
 
     return get_contents
 
-
 @pytest.fixture()
-def get_data_file_dataframe(data_dir: str) -> GetDataFrameCallable:
+def get_data_file_bytes(data_dir: str) -> Callable[[str], bytes]:
     """
-        Returns a function that will allow getting a dataframe from a csv file in the tests data directory easily
+        Returns a function that will allow getting the contents of a file in the tests data directory easily
     """
-    def get_dataframe(filename: str, parse_dates: _ParseDates = False) -> pd.DataFrame:
+    def get_bytes(filename: str) -> bytes:
         """
-            Get the DatFrame representation of the contents of a csv file in the tests data directory
+            Get the byte contents of a file in the tests data directory
 
 
             ARGUMENTS:
-            filename    : str : the name of the file within the tests data directory to load into a DataFrame
+            filename    : str : the name of the file within the tests data directory to get the contents of
         """
-        return pd.read_csv(os.path.join(data_dir, filename), index_col=0, parse_dates=parse_dates).reset_index(drop=True).convert_dtypes(convert_string=False)
-
-    return get_dataframe
+        with open(os.path.join(data_dir, filename), 'rb') as _file:
+            data = _file.read()
+            return data
 
+    return get_bytes
 
 @pytest.fixture()
-def response_get_monkeypatch(monkeypatch: MonkeyPatch) -> Callable:
+def get_data_file_dataframe(data_dir: str) -> GetDataFrameCallable:
     """
-        Returns a function that will monkeypatch the requests.get function call to return expected data 
+        Returns a function that will allow getting a dataframe from a csv file in the tests data directory easily
     """
-    def setup(result: Union[str, bytes], expected_url: Optional[str] = None) -> None:
+    def get_dataframe(filename: str, parse_dates: _ParseDates = False) -> pd.DataFrame:
         """
-           Get the DatFrame representation of the contents of a csv file in the tests data directory
+            Get the DatFrame representation of the contents of a csv file in the tests data directory
 
 
             ARGUMENTS:
-            result          : str            : the payload to return in the contents of the request.get call
-            expected_url    : str (optional) : an expected_url to test the get call against
-                                               to ensure the correct endpoint is hit
+            filename    : str : the name of the file within the tests data directory to load into a DataFrame
         """
-        def _monkeypatch(url: str, params: Optional[Dict] = None, timeout: Optional[int] = None) -> object:
-            final_url = url
-
-            if params:
-                query_params = urllib.parse.urlencode(params, safe=',')
-                final_url = f"{final_url}?{query_params}"
-
-            if expected_url is not None:
-                # These prints are desired as these are long and get cut off in the test outpute.
-                # These will only render on failed tests, so only when you would want to see them anyway.
-                print("expected", expected_url)
-                print("received", final_url)
-                assert final_url.endswith(expected_url)
-
-            class DummyResponse:
-                def __init__(self, content: Union[str, bytes]):
-                    self.content = content
-                    self.text = content
-                    self.status_code = 200
-                    self.url = final_url
+        return pd.read_csv(os.path.join(data_dir, filename), index_col=0, parse_dates=parse_dates).reset_index(drop=True).convert_dtypes(convert_string=False)
 
-            return DummyResponse(result)
+    return get_dataframe
 
-        monkeypatch.setattr(requests, 'get', _monkeypatch)
 
-    return setup
+@pytest.fixture()
+def response_get_monkeypatch(monkeypatch: MonkeyPatch) -> Callable:
+    return _get_monkeypatch(monkeypatch, requests)
 
 @pytest.fixture()
 def bref_get_monkeypatch(monkeypatch: MonkeyPatch) -> Callable:
+    return _get_monkeypatch(monkeypatch, BRefSession())
+
+@pytest.fixture()
+def target_get_monkeypatch(monkeypatch: MonkeyPatch, target: str | object) -> Callable:
+    return _get_monkeypatch(monkeypatch, target)
+
+def _get_monkeypatch(monkeypatch: MonkeyPatch, target: str | object) -> Callable:
     """
-        Returns a function that will monkeypatch the BRefSession.get function call to return expected data 
+        Returns a function that will monkeypatch the input target's get() function call to return supplied result.
     """
     def setup(result: Union[str, bytes], expected_url: Optional[str] = None) -> None:
         """
-           Get the DatFrame representation of the contents of a csv file in the tests data directory
+            Get the result when calling the get() function
 
 
             ARGUMENTS:
-            result          : str            : the payload to return in the contents of the request.get call
+            result          : str | bytes    : the payload to return in the contents of the request.get call
             expected_url    : str (optional) : an expected_url to test the get call against
                                                to ensure the correct endpoint is hit
         """
-        def _monkeypatch(url: str, params: Optional[Dict] = None, timeout: Optional[int] = None) -> object:
+        def _monkeypatch(url: str, params: Optional[Dict] = None, stream = False, timeout: Optional[int] = None) -> object:
             final_url = url
 
             if params:
@@ -230,6 +217,6 @@ def __init__(self, content: Union[str, bytes]):
 
             return DummyResponse(result)
 
-        monkeypatch.setattr(BRefSession(), 'get', _monkeypatch)
+        monkeypatch.setattr(target, 'get', _monkeypatch)
 
     return setup
diff --git a/tests/pybaseball/data/lahman.html b/tests/pybaseball/data/lahman.html
new file mode 100644
index 00000000..adab564d
--- /dev/null
+++ b/tests/pybaseball/data/lahman.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html>
+<html lang=en><head><title>SeanLahman.com</title><meta http-equiv=Content-Type content="text/html; charset=utf-8"><meta name=description content="Sean Lahman, journalist and baseball data guru"><meta name=robots content=INDEX,FOLLOW><meta content="width=device-width, initial-scale=1" name=viewport><meta property=og:description content="Sean Lahman, journalist and baseball data guru"><meta property=og:title content=SeanLahman.com><link href=index.html.css rel=stylesheet type=text/css><link rel=icon href=https://remixer-static.objects-us-east-1.dream.io/favicon.ico></head><body><section id=5276652 data-component-id=5276652 class=base data-position=0><script src=https://cdnjs.cloudflare.com/ajax/libs/baguettebox.js/1.9.1/baguetteBox.min.js></script><link rel=stylesheet href=https://cdnjs.cloudflare.com/ajax/libs/baguettebox.js/1.9.1/baguetteBox.min.css><link href=https://cdn.materialdesignicons.com/2.0.46/css/materialdesignicons.min.css rel=stylesheet><link href=https://fonts.googleapis.com/css?family=Anton|Bellefair|Bitter|Cabin|Courgette|Crete+Round|Dancing+Script|Droid+Sans|Encode+Sans+Expanded|Fjalla+One|Gudea|Hind|Josefin+Slab|Lato|Lora|Marck+Script|Montserrat|Mukta+Malar|Nothing+You+Could+Do|Open+Sans|Oswald|Play|PT+Sans|Quicksand|Raleway|Ranga|Roboto|Slabo+27px|Tangerine|Poiret+One rel=stylesheet></section><section id=5276653 data-component-id=5276653 class=nav data-position=100><div class="nav nav-component--outer links-top   component-wrapper"><div class="links-component--background
+                bg sib
+                " style="background-image: url('');
+                background-color: ;
+                background-position: ;
+                background-size: ;
+                opacity: ;"> </div><ul class="nav-links logo" style="justify-content: flex-start"><li class=brand-name><div class="h-font h3"><div style><font style="font-size: 1.5rem; background-color: initial;">SeanLahman.com</font><br></div></div></li></ul><label for=drop class=toggle>Menu</label><input type=checkbox id=drop><ul class=nav-links style="justify-content: flex-end; color: white"><li><a href=# class="mdi mdi-">Home</a></li><li><a href=http://en.wikipedia.org/wiki/Sean_Lahman class="mdi mdi-account">About</a></li><li><a href=mailto:seanlahman+website@gmail.com class="mdi mdi-email-open-outline">Contact</a></li></ul></div></section><section id=5276655 data-component-id=5276655 class=simple data-position=102><div class="simple-chord--wrapper component-wrapper"><div class="bg sib " style="background-image: url('');
+                 background-color: ;
+                 background-position: ;
+                 background-size: ;
+                 opacity: ;"></div><div class=simple-chord--inner><div class="h-font h2"><br><div style="text-align: center;">Lahman Baseball Database</div></div><div class="simple-chord--text body-text"><div><span style="color: rgb(194, 0, 0); font-weight: bold;">Updated version of the Lahman Database is now available.&nbsp;<a href=https://twitter.com/seanlahman style> Follow me on Twitter</a> to be notified of updates.<br></span>Hats off to Bryan Walko for his efforts to produce this annual update.<br><span><br><span style="font-weight: bold;">Download latest version (stats from 1871-2023 seasons)</span></span><br><ul><li><span style="background-color: white; font-size: 1rem;"><a href=https://www.dropbox.com/scl/fi/mvda69jy1zcivwibi4pjc/lahman_1871-2023_mdb.7z?rlkey=5r4avrqy8bkrwnnv6slndzdby&dl=0>Microsoft Access version</a></span><span style="font-size: 1rem;">&nbsp;</span><br></li><li><a href=https://www.dropbox.com/scl/fi/hy0sxw6gaai7ghemrshi8/lahman_1871-2023_csv.7z?rlkey=edw1u63zzxg48gvpcmr3qpnhz&dl=0>Comma-delimited version</a></li><li><a href=https://www.dropbox.com/scl/fi/m1qe4lfxfwtxbbxx14ck6/lahman_1871-2023_mssql.7z?rlkey=9xptmx95zsgynk2d106d9oy5n&dl=0>SQL version</a></li></ul><p>Files are compressed using 7-zip.&nbsp; <a href=https://7-zip.org/ >Click here for free download</a>.</p><p>The updated version of the database contains complete batting and pitching statistics from 1871 to 2023, plus&nbsp;fielding statistics, standings, team stats, managerial records,&nbsp;post-season data, and more. For more details on the latest release,&nbsp;please&nbsp;<a href=https://www.dropbox.com/scl/fi/9i2nhlskvfkqy7mbuqem7/readme2023.txt?rlkey=odnwx7ujztm0z4ob8dmggfcr0&dl=0>read&nbsp;the documentation</a>.</p><p></p><div></div><p></p><p><span style="font-size: 1rem;">The database can be used on any platform, but please be aware that&nbsp;this is not a standalone application. It is a database that requires&nbsp;Microsoft Access or some other relational database software to be&nbsp;useful.</span><br></p> </div></div></div></div></section><section id=5276684 data-component-id=5276684 class=photo data-position=103><div class="photo component-wrapper" style="flex-direction: row-reverse"><div class="bg sib " style="background-image: url('');
+                 background-color: ;
+                 background-position: ;
+                 background-size: ;
+                 opacity: ;"></div><div class=photo-text><div><div class="h2 h-font">Journalism</div><div class=body-text><div>From 2010-2023, I was a watchdog reporter for the&nbsp;Rochester Democrat and Chronicle, part of the USA Today Network. I wrote about public safety trends and the impact of technology in Rochester and for other Gannett papers across New York state, and participate in national projects with the USA Today investigations team.&nbsp; Prior to that I was a sports reporter and columnist with the New York Sun.<br></div><div><ul><li><a href=https://www.usatoday.com/search/?q=sean+lahman>Articles @ USA Today</a></li><li><a href=http://www.democratandchronicle.com/search/sean%20lahman/ >Articles @ Rochester Democrat and Chronicle</a></li><li><a href=https://www.nysun.com/author/sean-lahman>Articles @ New York Sun</a></li></ul></div></div></div></div><div class=photo-component--photo><div class="photo-photo bg " style="background-image: url('assets/73676c08-4fdf-44a1-9288-435790637308'); background-color: ; background-position: 37.59% 37.54%; background-size: 77%; opacity: ;"></div></div></div></section><section id=5276682 data-component-id=5276682 class=simple data-position=106><div class="simple-chord--wrapper component-wrapper"><div class="bg sib " style="background-image: url('');
+                 background-color: ;
+                 background-position: ;
+                 background-size: ;
+                 opacity: ;"></div><div class=simple-chord--inner><div class="h-font h2">Other data projects</div><div class="simple-chord--text body-text"><div><div><div><span style="font-size: 1rem;">Selected baseball data projects:</span><br></div></div><ul><li><a href=https://sabr.org/research/article/online-index-to-historical-baseball-guides/ >Online index to historical baseball guides</a></li><li><a href=https://sabr.org/tripleplays>SABR Triple Play database</a></li></ul></div></div></div></div></section><section id=5276659 data-component-id=5276659 class=footer-new data-position=110><div class="footer
+            footer-component--outer
+            component-wrapper"><div class="bg sib " style="background-image: url('');
+                 background-color: ;
+                 background-position: ;
+                 background-size: ;
+                 opacity: ;"></div><div class=footer-component--inner><div><div style="text-align: center;">&#xA9; 2024 Sean Lahman | Made with <a href=https://www.dreamhost.com/website-builder/ >Remixer</a> by <a href=https://www.dreamhost.com/ >DreamHost</a></div></div><div class=links><a href=https://twitter.com/seanlahman class="color-black mdi mdi-twitter"></a><a href=https://www.linkedin.com/in/seanlahman/ class="color-black mdi mdi-linkedin-box"> </a></div></div></div></section><script src=index.html.js></script></body></html>
\ No newline at end of file
diff --git a/tests/pybaseball/data/lahman_1871-2023_csv.7z b/tests/pybaseball/data/lahman_1871-2023_csv.7z
new file mode 100644
index 00000000..64406750
Binary files /dev/null and b/tests/pybaseball/data/lahman_1871-2023_csv.7z differ
diff --git a/tests/pybaseball/test_lahman.py b/tests/pybaseball/test_lahman.py
new file mode 100644
index 00000000..dc6541d9
--- /dev/null
+++ b/tests/pybaseball/test_lahman.py
@@ -0,0 +1,113 @@
+import tempfile
+import time
+from typing import Callable
+
+import pytest
+
+from pybaseball.lahman import *
+from pybaseball.lahman import _get_base_string, _get_download_url, _get_response, _get_session
+
+
+@pytest.fixture(name="sample_html")
+def _sample_html(get_data_file_contents: Callable[[str], str]) -> str:
+    return get_data_file_contents('lahman.html')
+
+@pytest.fixture(name="sample_bytes")
+def _sample_bytes(get_data_file_bytes: Callable[[str], bytes]) -> bytes:
+    return get_data_file_bytes('lahman_1871-2023_csv.7z')
+
+@pytest.fixture(name="target")
+def _target() -> str:
+    return _get_session()
+
+@pytest.fixture(autouse=True)
+def run_around_tests():
+    # setup
+    tempdir = tempfile.TemporaryDirectory()
+    cache.config.cache_directory = tempdir.name
+    yield
+    # teardown
+    pass
+
+def test_get_lahman_info(target_get_monkeypatch: Callable, sample_html: str):
+    target_get_monkeypatch(sample_html)
+
+    url = _get_download_url()
+    base_string = _get_base_string()
+
+    assert(url == "https://www.dropbox.com/scl/fi/hy0sxw6gaai7ghemrshi8/lahman_1871-2023_csv.7z?rlkey=edw1u63zzxg48gvpcmr3qpnhz&dl=1")
+    assert(base_string == "lahman_1871-2023_csv")
+
+def test_download_lahman(target_get_monkeypatch: Callable, sample_html: str,
+                         response_get_monkeypatch: Callable, sample_bytes: bytes):
+    target_get_monkeypatch(sample_html)
+    response_get_monkeypatch(sample_bytes)
+
+    # test download
+    b1 = download_lahman()
+    r1 = _get_response()
+    assert b1
+
+    # test download - no force
+    b2 = download_lahman()
+    r2 = _get_response()
+    assert not b2
+    assert r2.created_at == r1.created_at
+    assert r2.expires == r1.expires
+
+    # test download - with force
+    time.sleep(1.1)
+    b3 = download_lahman(force=True)
+    r3 = _get_response(force=True)
+    assert b3
+    assert r3.created_at == r1.created_at
+    assert r3.expires > r1.expires
+
+def test_lahman_tables(target_get_monkeypatch: Callable, sample_html: str,
+                       response_get_monkeypatch: Callable, sample_bytes: bytes):
+    target_get_monkeypatch(sample_html)
+    response_get_monkeypatch(sample_bytes)
+
+    download_lahman()
+
+    # test tables
+    assert not all_star_full().empty
+    assert not appearances().empty
+    assert not awards_managers().empty
+    assert not awards_players().empty
+    assert not awards_share_managers().empty
+    assert not awards_share_players().empty
+    assert not batting().empty
+    assert not batting_post().empty
+    assert not college_playing().empty
+    assert not fielding().empty
+    assert not fielding_of().empty
+    assert not fielding_of_split().empty
+    assert not fielding_post().empty
+    assert not hall_of_fame().empty
+    assert not home_games().empty
+    assert not managers().empty
+    assert not managers_half().empty
+    assert not master().empty
+    assert not parks().empty
+    assert not people().empty
+    assert not pitching().empty
+    assert not pitching_post().empty
+    assert not salaries().empty
+    assert not schools().empty
+    assert not series_post().empty
+    assert not teams_core().empty
+    assert not teams_franchises().empty
+    assert not teams_half().empty
+
+def test_lahman_schools(target_get_monkeypatch: Callable, sample_html: str,
+                        response_get_monkeypatch: Callable, sample_bytes: bytes):
+    target_get_monkeypatch(sample_html)
+    response_get_monkeypatch(sample_bytes)
+
+    download_lahman()
+
+    table = schools()
+    row = table.loc[table['schoolID'] == "ksstmaC"].iloc[0]
+    name = row['name_full']
+    assert name == "St. Mary's College"