Simplify to whitelist/blacklist model
- Rewrite merge_blocklists.py to sync a single blacklist from upstream and subtract the locally-maintained whitelist - Replace whitelist contents with subtitle + webm seed - Remove blacklist_permissive, whitelist_with_subtitles, and all .prev files that are no longer needed - Rewrite README to reflect the two-file model and link to wiki
This commit is contained in:
+51
-33
@@ -1,49 +1,67 @@
|
||||
"""Sync the blacklist from upstream Cleanuparr, preserving manual local
|
||||
additions and stripping entries listed in the locally-maintained whitelist.
|
||||
|
||||
See the wiki (Sync) for the full algorithm and rationale.
|
||||
"""
|
||||
import urllib.request
|
||||
import os
|
||||
|
||||
files = {
|
||||
"blacklist": "https://raw.githubusercontent.com/Cleanuparr/Cleanuparr/main/blacklist",
|
||||
"blacklist_permissive": "https://raw.githubusercontent.com/Cleanuparr/Cleanuparr/main/blacklist_permissive",
|
||||
"whitelist": "https://raw.githubusercontent.com/Cleanuparr/Cleanuparr/main/whitelist",
|
||||
"whitelist_with_subtitles": "https://raw.githubusercontent.com/Cleanuparr/Cleanuparr/main/whitelist_with_subtitles",
|
||||
}
|
||||
UPSTREAM_URL = "https://raw.githubusercontent.com/Cleanuparr/Cleanuparr/main/blacklist"
|
||||
BLACKLIST = "blacklist"
|
||||
BLACKLIST_PREV = "blacklist.prev"
|
||||
WHITELIST = "whitelist"
|
||||
|
||||
def merge_blocklist(filename, url):
|
||||
prev_file = f"{filename}.prev"
|
||||
|
||||
# Fetch new upstream
|
||||
with urllib.request.urlopen(url) as r:
|
||||
upstream_new = set(line.strip() for line in r.read().decode().splitlines() if line.strip())
|
||||
|
||||
# Read previous upstream (empty set if first run)
|
||||
def read_lines(path):
|
||||
"""Read a file into a set of non-empty stripped lines. Empty set if missing."""
|
||||
try:
|
||||
with open(prev_file) as f:
|
||||
upstream_prev = set(line.strip() for line in f if line.strip())
|
||||
with open(path) as f:
|
||||
return set(line.strip() for line in f if line.strip())
|
||||
except FileNotFoundError:
|
||||
return set()
|
||||
|
||||
|
||||
def main():
|
||||
# Fetch the current upstream blacklist
|
||||
with urllib.request.urlopen(UPSTREAM_URL) as r:
|
||||
upstream_new = set(
|
||||
line.strip() for line in r.read().decode().splitlines() if line.strip()
|
||||
)
|
||||
|
||||
# Previous upstream snapshot: baseline for detecting local additions.
|
||||
# On first run (no snapshot on disk), use the current upstream as the
|
||||
# baseline so nothing is treated as a local addition.
|
||||
upstream_prev = read_lines(BLACKLIST_PREV)
|
||||
if not upstream_prev:
|
||||
upstream_prev = upstream_new.copy()
|
||||
|
||||
# Read current local file
|
||||
try:
|
||||
with open(filename) as f:
|
||||
local = set(line.strip() for line in f if line.strip())
|
||||
except FileNotFoundError:
|
||||
local = set()
|
||||
# Current committed blacklist (may contain manual local additions)
|
||||
local = read_lines(BLACKLIST)
|
||||
|
||||
# Three-way merge
|
||||
# Locally-maintained whitelist (exclusion source)
|
||||
whitelist = read_lines(WHITELIST)
|
||||
|
||||
# Three-way merge: anything in local but not in the previous upstream
|
||||
# snapshot is a manual local addition that must be preserved.
|
||||
custom = local - upstream_prev
|
||||
result = upstream_new | custom
|
||||
merged = upstream_new | custom
|
||||
|
||||
print(f"[{filename}] Custom preserved: {sorted(custom)}")
|
||||
print(f"[{filename}] Upstream added: {sorted(upstream_new - upstream_prev)}")
|
||||
print(f"[{filename}] Upstream removed: {sorted(upstream_prev - upstream_new)}")
|
||||
# Strip whitelist entries from the merged result.
|
||||
result = merged - whitelist
|
||||
|
||||
# Write merged result sorted
|
||||
with open(filename, "w") as f:
|
||||
# Reporting for the workflow log
|
||||
print(f"[{BLACKLIST}] Upstream added: {sorted(upstream_new - upstream_prev)}")
|
||||
print(f"[{BLACKLIST}] Upstream removed: {sorted(upstream_prev - upstream_new)}")
|
||||
print(f"[{BLACKLIST}] Custom preserved: {sorted(custom)}")
|
||||
print(f"[{BLACKLIST}] Whitelist stripped: {sorted(merged & whitelist)}")
|
||||
|
||||
# Write the merged blacklist, sorted for deterministic diffs
|
||||
with open(BLACKLIST, "w") as f:
|
||||
f.write("\n".join(sorted(result)) + "\n")
|
||||
|
||||
# Store new upstream as prev for next run
|
||||
with open(prev_file, "w") as f:
|
||||
# Store the new upstream snapshot for the next run
|
||||
with open(BLACKLIST_PREV, "w") as f:
|
||||
f.write("\n".join(sorted(upstream_new)) + "\n")
|
||||
|
||||
for filename, url in files.items():
|
||||
merge_blocklist(filename, url)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user