Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reuse chunks from the same download if duplicated #11

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,8 @@ optional arguments:
--exclude <prefix> Exclude files starting with <prefix> (case
insensitive)
--install-tag <tag> Only download files with the specified install tag
--read-files Read duplicated parts from already saved files, do not
keep them in RAM
--enable-reordering Enable reordering optimization to reduce RAM
requirements during download (may have adverse results
for some titles)
Expand Down Expand Up @@ -670,6 +672,8 @@ log_level = debug
max_memory = 2048
; maximum number of worker processes when downloading (fewer workers will be slower, but also use less system resources)
max_workers = 8
; Enables reading duplicated data from files during download (decreases RAM usage but increases disk I/O)
read_files = false
; default install directory
install_dir = /mnt/tank/games
; locale override, must be in RFC 1766 format (e.g. "en-US")
Expand Down
3 changes: 3 additions & 0 deletions legendary/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -971,6 +971,7 @@ def install_game(self, args):
file_prefix_filter=args.file_prefix,
file_exclude_filter=args.file_exclude_prefix,
file_install_tag=args.install_tag,
read_files=args.read_files,
dl_optimizations=args.order_opt,
dl_timeout=args.dl_timeout,
repair=args.repair_mode,
Expand Down Expand Up @@ -2768,6 +2769,8 @@ def main():
type=str, help='Exclude files starting with <prefix> (case insensitive)')
install_parser.add_argument('--install-tag', dest='install_tag', action='append', metavar='<tag>',
type=str, help='Only download files with the specified install tag')
install_parser.add_argument('--read-files', dest='read_files', action='store_true',
help='Read duplicated parts from already saved files, do not keep them in memory')
install_parser.add_argument('--enable-reordering', dest='order_opt', action='store_true',
help='Enable reordering optimization to reduce RAM requirements '
'during download (may have adverse results for some titles)')
Expand Down
30 changes: 24 additions & 6 deletions legendary/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1327,6 +1327,7 @@ def prepare_download(self, game: Game, base_game: Game = None, base_path: str =
override_old_manifest: str = '', override_base_url: str = '',
platform: str = 'Windows', file_prefix_filter: list = None,
file_exclude_filter: list = None, file_install_tag: list = None,
read_files: bool = False,
dl_optimizations: bool = False, dl_timeout: int = 10,
repair: bool = False, repair_use_latest: bool = False,
disable_delta: bool = False, override_delta_manifest: str = '',
Expand Down Expand Up @@ -1487,6 +1488,9 @@ def prepare_download(self, game: Game, base_game: Game = None, base_path: str =
if not max_shm:
max_shm = self.lgd.config.getint('Legendary', 'max_memory', fallback=2048)

if not read_files:
read_files = self.lgd.config.getboolean('Legendary', 'read_files', fallback=False)

if dl_optimizations or is_opt_enabled(game.app_name, new_manifest.meta.build_version):
self.log.info('Download order optimizations are enabled.')
process_opt = True
Expand All @@ -1499,12 +1503,26 @@ def prepare_download(self, game: Game, base_game: Game = None, base_path: str =
dlm = DLManager(install_path, base_url, resume_file=resume_file, status_q=status_q,
max_shared_memory=max_shm * 1024 * 1024, max_workers=max_workers,
dl_timeout=dl_timeout, bind_ip=bind_ip)
anlres = dlm.run_analysis(manifest=new_manifest, old_manifest=old_manifest,
patch=not disable_patching, resume=not force,
file_prefix_filter=file_prefix_filter,
file_exclude_filter=file_exclude_filter,
file_install_tag=file_install_tag,
processing_optimization=process_opt)

analysis_kwargs = dict(
old_manifest=old_manifest,
patch=not disable_patching, resume=not force,
file_prefix_filter=file_prefix_filter,
file_exclude_filter=file_exclude_filter,
file_install_tag=file_install_tag,
processing_optimization=process_opt
)

try:
anlres = dlm.run_analysis(manifest=new_manifest, **analysis_kwargs, read_files=read_files)
except MemoryError:
if read_files:
raise
self.log.warning('Memory error encountered, retrying with file read enabled...')
dlm = DLManager(install_path, base_url, resume_file=resume_file, status_q=status_q,
max_shared_memory=max_shm * 1024 * 1024, max_workers=max_workers,
dl_timeout=dl_timeout, bind_ip=bind_ip)
anlres = dlm.run_analysis(manifest=new_manifest, **analysis_kwargs, read_files=True)

prereq = None
if new_manifest.meta.prereq_ids:
Expand Down
34 changes: 32 additions & 2 deletions legendary/downloader/mp/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def __init__(self, download_dir, base_url, cache_dir=None, status_q=None,
def run_analysis(self, manifest: Manifest, old_manifest: Manifest = None,
patch=True, resume=True, file_prefix_filter=None,
file_exclude_filter=None, file_install_tag=None,
read_files=False,
processing_optimization=False) -> AnalysisResult:
"""
Run analysis on manifest and old manifest (if not None) and return a result
Expand All @@ -94,6 +95,7 @@ def run_analysis(self, manifest: Manifest, old_manifest: Manifest = None,
:param file_prefix_filter: Only download files that start with this prefix
:param file_exclude_filter: Exclude files with this prefix from download
:param file_install_tag: Only install files with the specified tag
:param read_files: Allow reading from already finished files
:param processing_optimization: Attempt to optimize processing order and RAM usage
:return: AnalysisResult
"""
Expand Down Expand Up @@ -318,6 +320,30 @@ def run_analysis(self, manifest: Manifest, old_manifest: Manifest = None,
analysis_res.reuse_size += cp.size
break

# determine whether a chunk part is currently in written files
reusable_written = defaultdict(dict)
if read_files:
self.log.debug('Analyzing manifest for re-usable chunks in saved files...')
cur_written_cps = defaultdict(list)
for cur_file in fmlist:
cur_file_cps = dict()
cur_file_offset = 0
for cp in cur_file.chunk_parts:
key = (cp.guid_num, cp.offset, cp.size)
for wr_file_name, wr_file_offset, wr_cp_offset, wr_cp_end_offset in cur_written_cps[cp.guid_num]:
# check if new chunk part is wholly contained in a written chunk part
cur_cp_end_offset = cp.offset + cp.size
if wr_cp_offset <= cp.offset and wr_cp_end_offset >= cur_cp_end_offset:
references[cp.guid_num] -= 1
reuse_offset = wr_file_offset + (cp.offset - wr_cp_offset)
reusable_written[cur_file.filename][key] = (wr_file_name, reuse_offset)
break
cur_file_cps[cp.guid_num] = (cur_file.filename, cur_file_offset, cp.offset, cp.offset + cp.size)
cur_file_offset += cp.size

for guid, value in cur_file_cps.items():
cur_written_cps[guid].append(value)

last_cache_size = current_cache_size = 0
# set to determine whether a file is currently cached or not
cached = set()
Expand All @@ -338,17 +364,21 @@ def run_analysis(self, manifest: Manifest, old_manifest: Manifest = None,
continue

existing_chunks = re_usable.get(current_file.filename, None)
written_chunks = reusable_written.get(current_file.filename, None)
chunk_tasks = []
reused = 0

for cp in current_file.chunk_parts:
ct = ChunkTask(cp.guid_num, cp.offset, cp.size)

# re-use the chunk from the existing file if we can
if existing_chunks and (cp.guid_num, cp.offset, cp.size) in existing_chunks:
key = (cp.guid_num, cp.offset, cp.size)
if existing_chunks and key in existing_chunks:
reused += 1
ct.chunk_file = current_file.filename
ct.chunk_offset = existing_chunks[(cp.guid_num, cp.offset, cp.size)]
ct.chunk_offset = existing_chunks[key]
elif written_chunks and key in written_chunks:
ct.chunk_file, ct.chunk_offset = written_chunks[key]
else:
# add to DL list if not already in it
if cp.guid_num not in chunks_in_dl_list:
Expand Down
Loading