def clean_media_filename(filename): # 1. Define Regex patterns to find details # Looks for the title (everything before the year) title_pattern = r"^(.*?)(?=\b(?:19|20)\d2\b)" # Looks for the year (4 digits starting with 19 or 20) year_pattern = r"\b(19|20)\d2\b" # Looks for resolution (720p, 1080p, etc.) res_pattern = r"\b(480p|720p|1080p|2160p)\b" # Looks for source (WebDL, BluRay, HDRip, etc.) source_pattern = r"\b(WebDL|Web-DL|BluRay|BRRip|HDRip)\b" # 2. Extract components title_match = re.search(title_pattern, filename, re.IGNORECASE) year_match = re.search(year_pattern, filename) res_match = re.search(res_pattern, filename, re.IGNORECASE) source_match = re.search(source_pattern, filename, re.IGNORECASE) 0gomovies Cat Exclusive [SAFE]
# --- Usage --- raw_filename = "defyingchase2018720pwebdlhindichinesex2" Knza002 Korea Girl In Soul Vol 2 2010 05 2021 - 3.79.94.248
import re import os
# 3. Format the components # Clean title: replace dots/underscores with spaces, strip whitespace, apply Title Case clean_title = title_match.group(1).replace('.', ' ').replace('_', ' ').strip().title() if title_match else "Unknown Title" year = year_match.group(0) if year_match else "Unknown Year" # Format Quality tag: [720p Web-DL] quality_parts = [] if res_match: quality_parts.append(res_match.group(0)) if source_match: # Normalize "WebDL" to "Web-DL" src = source_match.group(0) quality_parts.append("Web-DL" if src.lower() == "webdl" else src) quality_tag = f"[' '.join(quality_parts)]" if quality_parts else ""
result = clean_media_filename(raw_filename)