-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
168 lines (141 loc) · 6.13 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import logging
import os
import shutil
import sys
from datetime import datetime, timedelta
from urllib.parse import urlparse
import pandas as pd
import pytz
import validators
from webdav4.client import Client
from exceptions import TazConfigurationError, TazDownloadError, TazDownloadFormatException
from models import TazDownloader, TazConfiguration
# Get directory
dir_path = os.path.dirname(os.path.realpath(__file__))
def main(config: dict):
# Get german date for tomorrow
tomorrow = (datetime.now(pytz.timezone('Europe/Berlin')) + timedelta(1)).strftime('%Y_%m_%d')
# Define tmp folder
tmp_folder = os.path.join(dir_path, 'tmp')
# Set log level
try:
logging.getLogger().setLevel(config['log_level'].upper())
except ValueError as e:
logging.error(f"Could not set log level.\n {e}")
# Read download history from csv file
try:
df = pd.read_csv(os.path.join(dir_path, 'download_history.csv'), header=0)
except FileNotFoundError:
# In case, there isn't yet a csv file, create data frame with headers
df = pd.DataFrame(
columns=[
'file',
'download_timestamp',
]
)
# If the 'limit_requests' argument is specified, check whether tomorrow's newspaper has already been downloaded
if config['limit_requests']:
try:
if any(df.file.str.contains(pat=tomorrow)):
logging.info('Tomorrow\'s newspaper was already downloaded. Execution canceled.')
sys.exit(0)
except Exception as e:
logging.error(f"Could not check whether tomorrow's newspaper has already been downloaded.\n {e}")
# Instantiate downloader object
try:
taz_dl = TazDownloader(config['id'], config['password'], config['download_format'])
except TazDownloadFormatException as e:
logging.error(e)
sys.exit(1)
try:
# Get newspaper available for download
newspaper_available = taz_dl.scrape_newspaper()
# Remove outdated newspaper from download_history.csv
df.drop([index for index, row in df.iterrows() if row.file not in newspaper_available], inplace=True)
# Find newspaper which are not already downloaded
newspaper_to_download = [n for n in newspaper_available if n not in df.file.values]
except TazDownloadError as e:
logging.error(e)
sys.exit(1)
# Download newspaper
newspaper_downloaded = []
for n in newspaper_to_download:
try:
if taz_dl.download_newspaper(n, tmp_folder):
newspaper_downloaded.append(n)
except Exception as e:
logging.error(f"Could not download {n}\n {e}")
# Add downloaded newspaper to download_history.csv
try:
for n in newspaper_downloaded:
df_tmp = pd.DataFrame(
{
'file': [n],
'download_timestamp': [datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M:%S')],
}
)
df = df.append(df_tmp, ignore_index=True)
df.sort_values(by='file', ascending=False, inplace=True)
df.to_csv(os.path.join(dir_path, 'download_history.csv'), index=False)
except Exception as e:
logging.error(f"Could not update download_history.csv\n {e}")
newspaper_downloaded_string = "\n ".join(newspaper_downloaded)
if config['nextcloud_webdav_url']:
if validators.url(config['nextcloud_webdav_url']):
url = urlparse(config['nextcloud_webdav_url'])
webdav_user = url.path.split("/")[-1]
webdav_password = config['nextcloud_webdav_password']
client = Client(f"{url.scheme}://{url.hostname}/public.php/webdav/",
auth=(webdav_user, webdav_password))
for n in newspaper_downloaded:
try:
client.upload_file(os.path.join(tmp_folder, n), n)
os.remove(os.path.join(tmp_folder, n))
except Exception as e:
logging.error(f"Could not upload {n} to {url}\n {e}")
if newspaper_downloaded:
logging.info(f"Uploaded\n {newspaper_downloaded_string}\n to {config['nextcloud_webdav_url']}")
else:
logging.error(f"Invalid url for Nextcloud webdav.")
sys.exit(1)
else:
# If neither a webdav url nor a download folder was provided, exit the program here
if not config['download_folder']:
logging.error(f"Please provide a download folder or a Nextcloud webdav url.\n {newspaper_downloaded_string}"
f"\n downloaded to {tmp_folder}")
sys.exit(1)
# Move downloaded file to download folder
if os.path.isdir(config['download_folder']):
download_folder = \
config['download_folder'] \
if config['download_folder'].endswith(os.path.sep) \
else config['download_folder'] + os.path.sep
for n in newspaper_downloaded:
try:
shutil.move(os.path.join(tmp_folder, n), download_folder)
except Exception as e:
logging.error(f"Could not move {n} to download folder \"{download_folder}\"\n {e}")
if newspaper_downloaded:
logging.info(f"Downloaded\n {newspaper_downloaded_string}\n to {config['download_folder']}")
else:
logging.error(f"{config['download_folder']} does not exists.\n {newspaper_downloaded_string}"
f"\n downloaded to {tmp_folder}")
if __name__ == '__main__':
# Set up logging
logging.basicConfig(
filename=os.path.join(dir_path, 'tazPlease.log'),
level=logging.ERROR,
format='%(asctime)s - %(message)s'
)
# Load configuration
try:
configuration = TazConfiguration().get_config()
except TazConfigurationError as tce:
print(tce)
sys.exit(1)
except Exception as exception:
print(exception)
sys.exit(1)
# Execute main function
if configuration:
main(configuration)