API Reference

This section contains the automatically generated API documentation for MKV Episode Matcher.

Core Modules

mkv_episode_matcher.main

Functions

print_welcome_message

print_welcome_message()

Print a stylized welcome message.

Source code in mkv_episode_matcher/__main__.py

def print_welcome_message():
    """Print a stylized welcome message."""
    console.print(
        Panel.fit(
            f"[bold blue]MKV Episode Matcher v{__version__}[/bold blue]\n"
            "[cyan]Automatically match and rename your MKV TV episodes[/cyan]",
            border_style="blue",
            padding=(1, 4),
        )
    )
    console.print()

confirm_api_key

confirm_api_key(config_value, key_name, description)

Confirm if the user wants to use an existing API key or enter a new one.

PARAMETER	DESCRIPTION
`config_value`	The current value from the config TYPE: `Optional[str]`
`key_name`	The name of the key TYPE: `str`
`description`	Description of the key for user information TYPE: `str`

RETURNS	DESCRIPTION
`str`	The API key to use

Source code in mkv_episode_matcher/__main__.py

def confirm_api_key(
    config_value: Optional[str], key_name: str, description: str
) -> str:
    """
    Confirm if the user wants to use an existing API key or enter a new one.

    Args:
        config_value: The current value from the config
        key_name: The name of the key
        description: Description of the key for user information

    Returns:
        The API key to use
    """
    if config_value:
        console.print(f"[cyan]{key_name}:[/cyan] {description}")
        console.print(f"Current value: [green]{mask_api_key(config_value)}[/green]")
        if Confirm.ask("Use existing key?", default=True):
            return config_value

    return Prompt.ask(f"Enter your {key_name}")

mask_api_key

mask_api_key(key)

Mask the API key for display purposes.

Source code in mkv_episode_matcher/__main__.py

def mask_api_key(key: str) -> str:
    """Mask the API key for display purposes."""
    if not key:
        return ""
    if len(key) <= 8:
        return "*" * len(key)
    return key[:4] + "*" * (len(key) - 8) + key[-4:]

select_season

select_season(seasons)

Allow user to select a season from a list.

PARAMETER	DESCRIPTION
`seasons`	List of available seasons

RETURNS	DESCRIPTION
	Selected season number or None for all seasons

Source code in mkv_episode_matcher/__main__.py

def select_season(seasons):
    """
    Allow user to select a season from a list.

    Args:
        seasons: List of available seasons

    Returns:
        Selected season number or None for all seasons
    """
    console.print("[bold cyan]Available Seasons:[/bold cyan]")
    for i, season in enumerate(seasons, 1):
        season_num = Path(season).name.replace("Season ", "")
        console.print(f"  {i}. Season {season_num}")

    console.print("  0. All Seasons")

    choice = Prompt.ask(
        "Select a season number (0 for all)",
        choices=[str(i) for i in range(len(seasons) + 1)],
        default="0",
    )

    if int(choice) == 0:
        return None

    selected_season = seasons[int(choice) - 1]
    return int(Path(selected_season).name.replace("Season ", ""))

onboarding

onboarding(config_path)

Prompt user for all required config values, showing existing as defaults.

Source code in mkv_episode_matcher/__main__.py

def onboarding(config_path):
    """Prompt user for all required config values, showing existing as defaults."""
    config = get_config(config_path) if config_path.exists() else {}

    def ask_with_default(prompt_text, key, description, secret=False):
        current = config.get(key)
        if current:
            console.print(f"[cyan]{key}:[/cyan] {description}")
            console.print(f"Current value: [green]{mask_api_key(current) if secret else current}[/green]")
            if Confirm.ask("Use existing value?", default=True):
                return current
        return Prompt.ask(f"Enter your {key}", default=current or "")

    tmdb_api_key = ask_with_default("TMDb API key", "tmdb_api_key", "Used to lookup show and episode information. To get your API key, create an account at https://www.themoviedb.org/ and follow the instructions at https://developer.themoviedb.org/docs/getting-started", secret=True)
    open_subtitles_username = ask_with_default("OpenSubtitles Username", "open_subtitles_username", "Account username for OpenSubtitles. To create an account, visit https://www.opensubtitles.com/ then click 'Register'")
    open_subtitles_password = ask_with_default("OpenSubtitles Password", "open_subtitles_password", "Account password for OpenSubtitles", secret=True)
    open_subtitles_user_agent = ask_with_default("OpenSubtitles Consumer Name", "open_subtitles_user_agent", "Required for subtitle downloads. Go to https://www.opensubtitles.com/en/consumers, click 'New Consumer', give it a name, then click 'Save'")
    open_subtitles_api_key = ask_with_default("OpenSubtitles API key", "open_subtitles_api_key", "Required for subtitle downloads. Enter the API key linked with the OpenSubtitles Consumer that you created in the previous step.", secret=True)
    show_dir = ask_with_default("Show Directory", "show_dir", "Main directory of the show")

    set_config(
        tmdb_api_key,
        open_subtitles_api_key,
        open_subtitles_user_agent,
        open_subtitles_username,
        open_subtitles_password,
        show_dir,
        config_path,
    )
    console.print("[bold green]Onboarding complete! Configuration saved.[/bold green]")

main

main()

Entry point of the application with enhanced user interface.

Source code in mkv_episode_matcher/__main__.py

@logger.catch
def main():
    """
    Entry point of the application with enhanced user interface.
    """
    print_welcome_message()

    # Parse command-line arguments
    parser = argparse.ArgumentParser(
        description="Automatically match and rename your MKV TV episodes",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )
    parser.add_argument(
        "--version",
        action="version",
        version=f"%(prog)s {__version__}",
        help="Show the version number and exit",
    )
    parser.add_argument("--tmdb-api-key", help="TMDb API key")
    parser.add_argument("--show-dir", help="Main directory of the show")
    parser.add_argument(
        "--season",
        type=int,
        default=None,
        nargs="?",
        help="Specify the season number to be processed (default: all seasons)",
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="Don't rename any files, just show what would happen",
    )
    parser.add_argument(
        "--get-subs",
        action="store_true",
        help="Download subtitles for the show",
    )
    parser.add_argument(
        "--check-gpu",
        action="store_true",
        help="Check if GPU is available for faster processing",
    )
    parser.add_argument(
        "--verbose",
        "-v",
        action="store_true",
        help="Enable verbose output",
    )
    parser.add_argument(
        "--confidence",
        type=float,
        default=0.7,
        help="Set confidence threshold for episode matching (0.0-1.0)",
    )
    parser.add_argument(
        "--onboard",
        action="store_true",
        help="Run onboarding to set up configuration",
    )
    args = parser.parse_args()
    if args.verbose:
        console.print("[bold cyan]Command-line Arguments[/bold cyan]")
        console.print(args)
    if args.check_gpu:
        from mkv_episode_matcher.utils import check_gpu_support

        with console.status("[bold green]Checking GPU support..."):
            check_gpu_support()
        return

    logger.debug(f"Command-line arguments: {args}")
    # Onboarding: run if --onboard or config file missing
    if args.onboard or not CONFIG_FILE.exists():
        onboarding(CONFIG_FILE)
        # Reload config after onboarding
        config = get_config(CONFIG_FILE)
    else:
        config = get_config(CONFIG_FILE)

    # Get TMDb API key
    tmdb_api_key = args.tmdb_api_key or config.get("tmdb_api_key")

    open_subtitles_api_key = config.get("open_subtitles_api_key")
    open_subtitles_user_agent = config.get("open_subtitles_user_agent")
    open_subtitles_username = config.get("open_subtitles_username")
    open_subtitles_password = config.get("open_subtitles_password")

    if args.get_subs:
        console.print("[bold cyan]Subtitle Download Configuration[/bold cyan]")

        tmdb_api_key = confirm_api_key(
            tmdb_api_key, "TMDb API key", "Used to lookup show and episode information"
        )

        open_subtitles_api_key = confirm_api_key(
            open_subtitles_api_key,
            "OpenSubtitles API key",
            "Required for subtitle downloads",
        )

        open_subtitles_user_agent = confirm_api_key(
            open_subtitles_user_agent,
            "OpenSubtitles User Agent",
            "Required for subtitle downloads",
        )

        open_subtitles_username = confirm_api_key(
            open_subtitles_username,
            "OpenSubtitles Username",
            "Account username for OpenSubtitles",
        )

        open_subtitles_password = confirm_api_key(
            open_subtitles_password,
            "OpenSubtitles Password",
            "Account password for OpenSubtitles",
        )

    # Use config for show directory
    show_dir = args.show_dir or config.get("show_dir")
    if not show_dir:
        show_dir = Prompt.ask("Enter the main directory of the show")

    logger.info(f"Show Directory: {show_dir}")
    if not Path(show_dir).exists():
        console.print(
            f"[bold red]Error:[/bold red] Show directory '{show_dir}' does not exist."
        )
        return

    if not show_dir:
        show_dir = Path.cwd()
        console.print(f"Using current directory: [cyan]{show_dir}[/cyan]")

    logger.debug(f"Show Directory: {show_dir}")

    # Set the configuration
    set_config(
        tmdb_api_key,
        open_subtitles_api_key,
        open_subtitles_user_agent,
        open_subtitles_username,
        open_subtitles_password,
        show_dir,
        CONFIG_FILE,
    )
    logger.info("Configuration set")

    # Process the show
    from mkv_episode_matcher.episode_matcher import process_show
    from mkv_episode_matcher.utils import get_valid_seasons

    console.print()
    if args.dry_run:
        console.print(
            Panel.fit(
                "[bold yellow]DRY RUN MODE[/bold yellow]\n"
                "Files will not be renamed, only showing what would happen.",
                border_style="yellow",
            )
        )

    seasons = get_valid_seasons(show_dir)
    if not seasons:
        console.print(
            "[bold red]Error:[/bold red] No seasons with .mkv files found in the show directory."
        )
        return

    # If season wasn't specified and there are multiple seasons, let user choose
    selected_season = args.season
    if selected_season is None and len(seasons) > 1:
        selected_season = select_season(seasons)

    # Show what's going to happen
    show_name = Path(show_dir).name
    season_text = f"Season {selected_season}" if selected_season else "all seasons"

    console.print(
        f"[bold green]Processing[/bold green] [cyan]{show_name}[/cyan], {season_text}"
    )

    # # Setup progress spinner
    # with Progress(
    #     TextColumn("[bold green]Processing...[/bold green]"),
    #     console=console,
    # ) as progress:
    #     task = progress.add_task("", total=None)
    process_show(
        selected_season,
        dry_run=args.dry_run,
        get_subs=args.get_subs,
        verbose=args.verbose,
        confidence=args.confidence,
    )

    console.print("[bold green]✓[/bold green] Processing completed successfully!")

    # Show where logs are stored
    console.print(f"\n[dim]Logs available at: {log_dir}[/dim]")

mkv_episode_matcher.episode_matcher

Classes

Functions

process_show

process_show(season=None, dry_run=False, get_subs=False, verbose=False, confidence=0.6)

Process the show using streaming speech recognition with improved UI feedback.

PARAMETER	DESCRIPTION
`season`	Season number to process. Defaults to None (all seasons). TYPE: `int` DEFAULT: `None`
`dry_run`	If True, only simulate actions without making changes. TYPE: `bool` DEFAULT: `False`
`get_subs`	If True, download subtitles for the show. TYPE: `bool` DEFAULT: `False`
`verbose`	If True, display more detailed progress information. TYPE: `bool` DEFAULT: `False`
`confidence`	Confidence threshold for episode matching (0.0-1.0). TYPE: `float` DEFAULT: `0.6`

Source code in mkv_episode_matcher/episode_matcher.py

def process_show(
    season=None, dry_run=False, get_subs=False, verbose=False, confidence=0.6
):
    """
    Process the show using streaming speech recognition with improved UI feedback.

    Args:
        season (int, optional): Season number to process. Defaults to None (all seasons).
        dry_run (bool): If True, only simulate actions without making changes.
        get_subs (bool): If True, download subtitles for the show.
        verbose (bool): If True, display more detailed progress information.
        confidence (float): Confidence threshold for episode matching (0.0-1.0).
    """
    config = get_config(CONFIG_FILE)
    show_dir = config.get("show_dir")
    show_name = clean_text(normalize_path(show_dir).name)
    matcher = EpisodeMatcher(CACHE_DIR, show_name, min_confidence=confidence)

    # Early check for reference files
    reference_dir = Path(CACHE_DIR) / "data" / show_name
    reference_files = list(reference_dir.glob("*.srt"))
    if (not get_subs) and (not reference_files):
        console.print(
            f"[bold yellow]Warning:[/bold yellow] No reference subtitle files found in {reference_dir}"
        )
        console.print(
            "[cyan]Tip:[/cyan] Use --get-subs to download reference subtitles"
        )
        return

    season_paths = get_valid_seasons(show_dir)
    if not season_paths:
        console.print("[bold red]Error:[/bold red] No seasons with .mkv files found")
        return

    if season is not None:
        season_path = str(Path(show_dir) / f"Season {season}")
        if season_path not in season_paths:
            console.print(
                f"[bold red]Error:[/bold red] Season {season} has no .mkv files to process"
            )
            return
        season_paths = [season_path]

    total_processed = 0
    total_matched = 0

    for season_path in season_paths:
        mkv_files = [
            f for f in Path(season_path).glob("*.mkv") if not check_filename(f)
        ]

        if not mkv_files:
            season_num = Path(season_path).name.replace("Season ", "")
            console.print(f"[dim]No new files to process in Season {season_num}[/dim]")
            continue

        season_num = int(re.search(r"Season (\d+)", season_path).group(1))
        temp_dir = Path(season_path) / "temp"
        temp_dir.mkdir(exist_ok=True)

        try:
            if get_subs:
                show_id = fetch_show_id(matcher.show_name)
                if show_id:
                    console.print(
                        f"[bold cyan]Downloading subtitles for Season {season_num}...[/bold cyan]"
                    )
                    get_subtitles(show_id, seasons={season_num}, config=config)
                else:
                    console.print(
                        "[bold red]Error:[/bold red] Could not find show ID. Skipping subtitle download."
                    )

            console.print(
                f"[bold cyan]Processing {len(mkv_files)} files in Season {season_num}...[/bold cyan]"
            )

            # Process files with a progress bar
            with Progress(
                TextColumn("[progress.description]{task.description}"),
                BarColumn(),
                TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
                TimeElapsedColumn(),
                console=console,
            ) as progress:
                task = progress.add_task(
                    f"[cyan]Matching Season {season_num}[/cyan]", total=len(mkv_files)
                )

                for mkv_file in mkv_files:
                    file_basename = Path(mkv_file).name
                    progress.update(
                        task, description=f"[cyan]Processing[/cyan] {file_basename}"
                    )

                    if verbose:
                        console.print(f"  Analyzing {file_basename}...")

                    total_processed += 1
                    match = matcher.identify_episode(mkv_file, temp_dir, season_num)

                    if match:
                        total_matched += 1
                        new_name = f"{matcher.show_name} - S{match['season']:02d}E{match['episode']:02d}.mkv"

                        confidence_color = (
                            "green" if match["confidence"] > 0.8 else "yellow"
                        )

                        if verbose or dry_run:
                            console.print(
                                f"  Match: [bold]{file_basename}[/bold] → [bold cyan]{new_name}[/bold cyan] "
                                f"(confidence: [{confidence_color}]{match['confidence']:.2f}[/{confidence_color}])"
                            )

                        if not dry_run:
                            rename_episode_file(mkv_file, new_name)
                    else:
                        if verbose:
                            console.print(
                                f"  [yellow]No match found for {file_basename}[/yellow]"
                            )

                    progress.advance(task)
        finally:
            if not dry_run and temp_dir.exists():
                shutil.rmtree(temp_dir)

    # Summary
    console.print()
    if total_processed == 0:
        console.print("[yellow]No files needed processing[/yellow]")
    else:
        console.print(f"[bold]Summary:[/bold] Processed {total_processed} files")
        console.print(
            f"[bold green]Successfully matched:[/bold green] {total_matched} files"
        )

        if total_matched < total_processed:
            console.print(
                f"[bold yellow]Unmatched:[/bold yellow] {total_processed - total_matched} files"
            )
            console.print(
                "[cyan]Tip:[/cyan] Try downloading subtitles with --get-subs or "
                "check that your files are named consistently"
            )

mkv_episode_matcher.episode_identification

Classes

SubtitleCache

SubtitleCache()

Cache for storing parsed subtitle data to avoid repeated loading and parsing.

Source code in mkv_episode_matcher/episode_identification.py

def __init__(self):
    self.subtitles = {}  # {file_path: parsed_content}
    self.chunk_cache = {}  # {(file_path, chunk_idx): text}

Functions

get_subtitle_content

get_subtitle_content(srt_file)

Get the full content of a subtitle file, loading it only once.

Source code in mkv_episode_matcher/episode_identification.py

def get_subtitle_content(self, srt_file):
    """Get the full content of a subtitle file, loading it only once."""
    srt_file = str(srt_file)
    if srt_file not in self.subtitles:
        reader = SubtitleReader()
        self.subtitles[srt_file] = reader.read_srt_file(srt_file)
    return self.subtitles[srt_file]

get_chunk

get_chunk(srt_file, chunk_idx, chunk_start, chunk_end)

Get a specific time chunk from a subtitle file, with caching.

Source code in mkv_episode_matcher/episode_identification.py

def get_chunk(self, srt_file, chunk_idx, chunk_start, chunk_end):
    """Get a specific time chunk from a subtitle file, with caching."""
    srt_file = str(srt_file)
    cache_key = (srt_file, chunk_idx)

    if cache_key not in self.chunk_cache:
        content = self.get_subtitle_content(srt_file)
        reader = SubtitleReader()
        text_lines = reader.extract_subtitle_chunk(content, chunk_start, chunk_end)
        self.chunk_cache[cache_key] = " ".join(text_lines)

    return self.chunk_cache[cache_key]

EpisodeMatcher

EpisodeMatcher(cache_dir, show_name, min_confidence=0.6)

Source code in mkv_episode_matcher/episode_identification.py

def __init__(self, cache_dir, show_name, min_confidence=0.6):
    self.cache_dir = Path(cache_dir)
    self.min_confidence = min_confidence
    self.show_name = show_name
    self.chunk_duration = 30
    self.skip_initial_duration = 300
    self.device = "cuda" if torch.cuda.is_available() else "cpu"
    self.temp_dir = Path(tempfile.gettempdir()) / "whisper_chunks"
    self.temp_dir.mkdir(exist_ok=True)
    # Initialize subtitle cache
    self.subtitle_cache = SubtitleCache()
    # Cache for extracted audio chunks
    self.audio_chunks = {}
    # Store reference files to avoid repeated glob operations
    self.reference_files_cache = {}

Functions

extract_audio_chunk

extract_audio_chunk(mkv_file, start_time)

Extract a chunk of audio from MKV file with caching.

Source code in mkv_episode_matcher/episode_identification.py

def extract_audio_chunk(self, mkv_file, start_time):
    """Extract a chunk of audio from MKV file with caching."""
    cache_key = (str(mkv_file), start_time)

    if cache_key in self.audio_chunks:
        return self.audio_chunks[cache_key]

    chunk_path = self.temp_dir / f"chunk_{start_time}.wav"
    if not chunk_path.exists():
        cmd = [
            "ffmpeg",
            "-ss",
            str(start_time),
            "-t",
            str(self.chunk_duration),
            "-i",
            mkv_file,
            "-vn",  # Disable video
            "-sn",  # Disable subtitles
            "-dn",  # Disable data streams
            "-acodec",
            "pcm_s16le",
            "-ar",
            "16000",
            "-ac",
            "1",
            "-y",  # Overwrite output files without asking
            str(chunk_path),
        ]
        subprocess.run(cmd, capture_output=True)

    chunk_path_str = str(chunk_path)
    self.audio_chunks[cache_key] = chunk_path_str
    return chunk_path_str

load_reference_chunk

load_reference_chunk(srt_file, chunk_idx)

Load reference subtitles for a specific time chunk with caching.

PARAMETER	DESCRIPTION
`srt_file`	Path to the SRT file TYPE: `str or Path`
`chunk_idx`	Index of the chunk to load TYPE: `int`

RETURNS	DESCRIPTION
`str`	Combined text from the subtitle chunk

Source code in mkv_episode_matcher/episode_identification.py

def load_reference_chunk(self, srt_file, chunk_idx):
    """
    Load reference subtitles for a specific time chunk with caching.

    Args:
        srt_file (str or Path): Path to the SRT file
        chunk_idx (int): Index of the chunk to load

    Returns:
        str: Combined text from the subtitle chunk
    """
    try:
        # Apply the same offset as in _try_match_with_model
        chunk_start = self.skip_initial_duration + (chunk_idx * self.chunk_duration)
        chunk_end = chunk_start + self.chunk_duration

        return self.subtitle_cache.get_chunk(
            srt_file, chunk_idx, chunk_start, chunk_end
        )

    except Exception as e:
        logger.error(f"Error loading reference chunk from {srt_file}: {e}")
        return ""

get_reference_files

get_reference_files(season_number)

Get reference subtitle files with caching.

Source code in mkv_episode_matcher/episode_identification.py

def get_reference_files(self, season_number):
    """Get reference subtitle files with caching."""
    cache_key = (self.show_name, season_number)
    logger.debug(f"Reference cache key: {cache_key}")

    if cache_key in self.reference_files_cache:
        logger.debug("Returning cached reference files")
        return self.reference_files_cache[cache_key]

    reference_dir = self.cache_dir / "data" / self.show_name
    patterns = [
        f"S{season_number:02d}E",
        f"S{season_number}E",
        f"{season_number:02d}x",
        f"{season_number}x",
    ]

    reference_files = []
    for pattern in patterns:
        # Use case-insensitive file extension matching by checking both .srt and .SRT
        srt_files = list(reference_dir.glob("*.srt")) + list(reference_dir.glob("*.SRT"))
        files = [
            f
            for f in srt_files
            if re.search(f"{pattern}\\d+", f.name, re.IGNORECASE)
        ]
        reference_files.extend(files)

    # Remove duplicates while preserving order
    reference_files = list(dict.fromkeys(reference_files))
    logger.debug(
        f"Found {len(reference_files)} reference files for season {season_number}"
    )
    self.reference_files_cache[cache_key] = reference_files
    return reference_files

identify_episode

identify_episode(video_file, temp_dir, season_number)

Progressive episode identification with faster initial attempt.

Source code in mkv_episode_matcher/episode_identification.py

def identify_episode(self, video_file, temp_dir, season_number):
    """Progressive episode identification with faster initial attempt."""
    try:
        # Get reference files first with caching
        reference_files = self.get_reference_files(season_number)

        if not reference_files:
            logger.error(f"No reference files found for season {season_number}")
            return None

        # Cache video duration
        duration = get_video_duration(video_file)

        # Try with tiny model first (fastest)
        logger.info("Attempting match with tiny model...")
        match = self._try_match_with_model(
            video_file,
            "tiny.en",
            min(duration, 300),
            reference_files,  # Limit to first 5 minutes
        )
        if (
            match and match["confidence"] > 0.65
        ):  # Slightly lower threshold for tiny
            logger.info(
                f"Successfully matched with tiny model at {match['matched_at']}s (confidence: {match['confidence']:.2f})"
            )
            return match

        # If no match, try base model
        logger.info(
            "No match with tiny model, extending base model search to 5 minutes..."
        )
        match = self._try_match_with_model(
            video_file,
            "base.en",
            min(duration, 300),
            reference_files,  # Limit to first 5 minutes
        )
        if match:
            logger.info(
                f"Successfully matched with base model at {match['matched_at']}s (confidence: {match['confidence']:.2f})"
            )
            return match

        logger.info("Speech recognition match failed")
        return None

    finally:
        # Cleanup temp files - keep this limited to only files we know we created
        for chunk_info in self.audio_chunks.values():
            try:
                Path(chunk_info).unlink(missing_ok=True)
            except Exception as e:
                logger.warning(f"Failed to delete temp file {chunk_info}: {e}")

SubtitleReader

Helper class for reading and parsing subtitle files.

Functions

parse_timestamp `staticmethod`

parse_timestamp(timestamp)

Parse SRT timestamp into seconds.

Source code in mkv_episode_matcher/episode_identification.py

@staticmethod
def parse_timestamp(timestamp):
    """Parse SRT timestamp into seconds."""
    hours, minutes, seconds = timestamp.replace(",", ".").split(":")
    return float(hours) * 3600 + float(minutes) * 60 + float(seconds)

read_srt_file `staticmethod`

read_srt_file(file_path)

Read an SRT file and return its contents with robust encoding handling.

PARAMETER	DESCRIPTION
`file_path`	Path to the SRT file TYPE: `str or Path`

RETURNS	DESCRIPTION
`str`	Contents of the SRT file

Source code in mkv_episode_matcher/episode_identification.py

@staticmethod
def read_srt_file(file_path):
    """
    Read an SRT file and return its contents with robust encoding handling.

    Args:
        file_path (str or Path): Path to the SRT file

    Returns:
        str: Contents of the SRT file
    """
    return read_file_with_fallback(file_path)

extract_subtitle_chunk `staticmethod`

extract_subtitle_chunk(content, start_time, end_time)

Extract subtitle text for a specific time window.

PARAMETER	DESCRIPTION
`content`	Full SRT file content TYPE: `str`
`start_time`	Chunk start time in seconds TYPE: `float`
`end_time`	Chunk end time in seconds TYPE: `float`

RETURNS	DESCRIPTION
`list`	List of subtitle texts within the time window

Source code in mkv_episode_matcher/episode_identification.py

@staticmethod
def extract_subtitle_chunk(content, start_time, end_time):
    """
    Extract subtitle text for a specific time window.

    Args:
        content (str): Full SRT file content
        start_time (float): Chunk start time in seconds
        end_time (float): Chunk end time in seconds

    Returns:
        list: List of subtitle texts within the time window
    """
    text_lines = []

    for block in content.strip().split("\n\n"):
        lines = block.split("\n")
        if len(lines) < 3 or "-->" not in lines[1]:
            continue

        try:
            timestamp = lines[1]
            time_parts = timestamp.split(" --> ")
            start_stamp = time_parts[0].strip()
            end_stamp = time_parts[1].strip()

            subtitle_start = SubtitleReader.parse_timestamp(start_stamp)
            subtitle_end = SubtitleReader.parse_timestamp(end_stamp)

            # Check if this subtitle overlaps with our chunk
            if subtitle_end >= start_time and subtitle_start <= end_time:
                text = " ".join(lines[2:])
                text_lines.append(text)

        except (IndexError, ValueError) as e:
            logger.warning(f"Error parsing subtitle block: {e}")
            continue

    return text_lines

Functions

get_video_duration `cached`

get_video_duration(video_file)

Get video duration with caching.

Source code in mkv_episode_matcher/episode_identification.py

@lru_cache(maxsize=100)
def get_video_duration(video_file):
    """Get video duration with caching."""
    duration = float(
        subprocess.check_output([
            "ffprobe",
            "-v",
            "error",
            "-show_entries",
            "format=duration",
            "-of",
            "default=noprint_wrappers=1:nokey=1",
            video_file,
        ]).decode()
    )
    return int(np.ceil(duration))

detect_file_encoding

detect_file_encoding(file_path)

Detect the encoding of a file using chardet.

PARAMETER	DESCRIPTION
`file_path`	Path to the file TYPE: `str or Path`

RETURNS	DESCRIPTION
`str`	Detected encoding, defaults to 'utf-8' if detection fails

Source code in mkv_episode_matcher/episode_identification.py

def detect_file_encoding(file_path):
    """
    Detect the encoding of a file using chardet.

    Args:
        file_path (str or Path): Path to the file

    Returns:
        str: Detected encoding, defaults to 'utf-8' if detection fails
    """
    try:
        with open(file_path, "rb") as f:
            raw_data = f.read(
                min(1024 * 1024, Path(file_path).stat().st_size)
            )  # Read up to 1MB
        result = chardet.detect(raw_data)
        encoding = result["encoding"]
        confidence = result["confidence"]

        logger.debug(
            f"Detected encoding {encoding} with {confidence:.2%} confidence for {file_path}"
        )
        return encoding if encoding else "utf-8"
    except Exception as e:
        logger.warning(f"Error detecting encoding for {file_path}: {e}")
        return "utf-8"

read_file_with_fallback `cached`

read_file_with_fallback(file_path, encodings=None)

Read a file trying multiple encodings in order of preference.

PARAMETER	DESCRIPTION
`file_path`	Path to the file TYPE: `str or Path`
`encodings`	List of encodings to try, defaults to common subtitle encodings TYPE: `list` DEFAULT: `None`

RETURNS	DESCRIPTION
`str`	File contents

RAISES	DESCRIPTION
`ValueError`	If file cannot be read with any encoding

Source code in mkv_episode_matcher/episode_identification.py

@lru_cache(maxsize=100)
def read_file_with_fallback(file_path, encodings=None):
    """
    Read a file trying multiple encodings in order of preference.

    Args:
        file_path (str or Path): Path to the file
        encodings (list): List of encodings to try, defaults to common subtitle encodings

    Returns:
        str: File contents

    Raises:
        ValueError: If file cannot be read with any encoding
    """
    if encodings is None:
        # First try detected encoding, then fallback to common subtitle encodings
        detected = detect_file_encoding(file_path)
        encodings = [detected, "utf-8", "latin-1", "cp1252", "iso-8859-1"]

    file_path = Path(file_path)
    errors = []

    for encoding in encodings:
        try:
            with open(file_path, encoding=encoding) as f:
                content = f.read()
            logger.debug(f"Successfully read {file_path} using {encoding} encoding")
            return content
        except UnicodeDecodeError as e:
            errors.append(f"{encoding}: {str(e)}")
            continue

    error_msg = f"Failed to read {file_path} with any encoding. Errors:\n" + "\n".join(
        errors
    )
    logger.error(error_msg)
    raise ValueError(error_msg)

get_whisper_model

get_whisper_model(model_name='tiny', device=None)

Cache whisper models to avoid reloading.

Source code in mkv_episode_matcher/episode_identification.py

def get_whisper_model(model_name="tiny", device=None):
    """Cache whisper models to avoid reloading."""
    global _whisper_models
    if device is None:
        device = "cuda" if torch.cuda.is_available() else "cpu"

    key = f"{model_name}_{device}"
    if key not in _whisper_models:
        _whisper_models[key] = whisper.load_model(model_name, device=device)
        logger.info(f"Loaded {model_name} model on {device}")

    return _whisper_models[key]

TMDB Client

mkv_episode_matcher.tmdb_client

Classes

RateLimitedRequest

RateLimitedRequest(rate_limit=30, period=1)

A class that represents a rate-limited request object.

ATTRIBUTE	DESCRIPTION
`rate_limit`	Maximum number of requests allowed per period. TYPE: `int`
`period`	Period in seconds. TYPE: `int`
`requests_made`	Counter for requests made. TYPE: `int`
`start_time`	Start time of the current period. TYPE: `float`
`lock`	Lock for synchronization. TYPE: `Lock`

Source code in mkv_episode_matcher/tmdb_client.py

def __init__(self, rate_limit=30, period=1):
    self.rate_limit = rate_limit
    self.period = period
    self.requests_made = 0
    self.start_time = time.time()
    self.lock = Lock()

Functions

get

get(url)

Sends a rate-limited GET request to the specified URL.

PARAMETER	DESCRIPTION
`url`	The URL to send the request to. TYPE: `str`

RETURNS	DESCRIPTION
`Response`	The response object returned by the request.

Source code in mkv_episode_matcher/tmdb_client.py

def get(self, url):
    """
    Sends a rate-limited GET request to the specified URL.

    Args:
        url (str): The URL to send the request to.

    Returns:
        Response: The response object returned by the request.
    """
    with self.lock:
        if self.requests_made >= self.rate_limit:
            sleep_time = self.period - (time.time() - self.start_time)
            if sleep_time > 0:
                time.sleep(sleep_time)
            self.requests_made = 0
            self.start_time = time.time()

        self.requests_made += 1

    response = requests.get(url)
    return response

Functions

fetch_show_id

fetch_show_id(show_name)

Fetch the TMDb ID for a given show name.

PARAMETER	DESCRIPTION
`show_name`	The name of the show. TYPE: `str`

RETURNS	DESCRIPTION
`str`	The TMDb ID of the show, or None if not found.

Source code in mkv_episode_matcher/tmdb_client.py

def fetch_show_id(show_name):
    """
    Fetch the TMDb ID for a given show name.

    Args:
        show_name (str): The name of the show.

    Returns:
        str: The TMDb ID of the show, or None if not found.
    """
    config = get_config(CONFIG_FILE)
    tmdb_api_key = config.get("tmdb_api_key")
    url = f"https://api.themoviedb.org/3/search/tv?query={show_name}&api_key={tmdb_api_key}"
    response = requests.get(url)
    if response.status_code == 200:
        results = response.json().get("results", [])
        if results:
            return str(results[0]["id"])
    return None

fetch_season_details

fetch_season_details(show_id, season_number)

Fetch the total number of episodes for a given show and season from the TMDb API.

PARAMETER	DESCRIPTION
`show_id`	The ID of the show on TMDb. TYPE: `str`
`season_number`	The season number to fetch details for. TYPE: `int`

RETURNS	DESCRIPTION
`int`	The total number of episodes in the season, or 0 if the API request failed.

Source code in mkv_episode_matcher/tmdb_client.py

def fetch_season_details(show_id, season_number):
    """
    Fetch the total number of episodes for a given show and season from the TMDb API.

    Args:
        show_id (str): The ID of the show on TMDb.
        season_number (int): The season number to fetch details for.

    Returns:
        int: The total number of episodes in the season, or 0 if the API request failed.
    """
    logger.info(f"Fetching season details for Season {season_number}...")
    config = get_config(CONFIG_FILE)
    tmdb_api_key = config.get("tmdb_api_key")
    url = f"https://api.themoviedb.org/3/tv/{show_id}/season/{season_number}?api_key={tmdb_api_key}"
    try:
        response = requests.get(url)
        response.raise_for_status()
        season_data = response.json()
        total_episodes = len(season_data.get("episodes", []))
        return total_episodes
    except requests.exceptions.RequestException as e:
        logger.error(f"Failed to fetch season details for Season {season_number}: {e}")
        return 0
    except KeyError:
        logger.error(
            f"Missing 'episodes' key in response JSON data for Season {season_number}"
        )
        return 0

get_number_of_seasons

get_number_of_seasons(show_id)

Retrieves the number of seasons for a given TV show from the TMDB API.

Parameters: - show_id (int): The ID of the TV show.

Returns: - num_seasons (int): The number of seasons for the TV show.

Raises: - requests.HTTPError: If there is an error while making the API request.

Source code in mkv_episode_matcher/tmdb_client.py

def get_number_of_seasons(show_id):
    """
    Retrieves the number of seasons for a given TV show from the TMDB API.

    Parameters:
    - show_id (int): The ID of the TV show.

    Returns:
    - num_seasons (int): The number of seasons for the TV show.

    Raises:
    - requests.HTTPError: If there is an error while making the API request.
    """
    config = get_config(CONFIG_FILE)
    tmdb_api_key = config.get("tmdb_api_key")
    url = f"https://api.themoviedb.org/3/tv/{show_id}?api_key={tmdb_api_key}"
    response = requests.get(url)
    response.raise_for_status()
    show_data = response.json()
    num_seasons = show_data.get("number_of_seasons", 0)
    logger.info(f"Found {num_seasons} seasons")
    return num_seasons

Utilities

mkv_episode_matcher.utils

Functions

normalize_path

normalize_path(path_str)

Normalize a path string to handle cross-platform path issues. Properly handles trailing slashes and backslashes in both Windows and Unix paths. Also strips surrounding quotes that might be present in command line arguments.

PARAMETER	DESCRIPTION
`path_str`	The path string to normalize TYPE: `str`

RETURNS	DESCRIPTION
	pathlib.Path: A normalized Path object

Source code in mkv_episode_matcher/utils.py

def normalize_path(path_str):
    """
    Normalize a path string to handle cross-platform path issues.
    Properly handles trailing slashes and backslashes in both Windows and Unix paths.
    Also strips surrounding quotes that might be present in command line arguments.

    Args:
        path_str (str): The path string to normalize

    Returns:
        pathlib.Path: A normalized Path object
    """
    # Convert to string if it's a Path object
    if isinstance(path_str, Path):
        path_str = str(path_str)

    # Strip surrounding quotes (both single and double)
    path_str = path_str.strip().strip('"').strip("'")

    # Remove trailing slashes or backslashes
    path_str = path_str.rstrip("/").rstrip("\\")

    # Handle Windows paths on non-Windows platforms
    if os.name != "nt" and "\\" in path_str and ":" in path_str[:2]:
        # This looks like a Windows path on a non-Windows system
        # Extract the last component which should be the directory/file name
        components = path_str.split("\\")
        return Path(components[-1])

    return Path(path_str)

get_valid_seasons

get_valid_seasons(show_dir)

Get all season directories that contain MKV files.

PARAMETER	DESCRIPTION
`show_dir`	Base directory for the TV show TYPE: `str`

RETURNS	DESCRIPTION
`list`	List of paths to valid season directories

Source code in mkv_episode_matcher/utils.py

def get_valid_seasons(show_dir):
    """
    Get all season directories that contain MKV files.

    Args:
        show_dir (str): Base directory for the TV show

    Returns:
        list: List of paths to valid season directories
    """
    # Get all season directories
    show_path = normalize_path(show_dir)
    season_paths = [str(show_path / d.name) for d in show_path.iterdir() if d.is_dir()]

    # Filter seasons to only include those with .mkv files
    valid_season_paths = []
    for season_path in season_paths:
        season_path_obj = Path(season_path)
        mkv_files = [f for f in season_path_obj.iterdir() if f.name.endswith(".mkv")]
        if mkv_files:
            valid_season_paths.append(season_path)

    if not valid_season_paths:
        logger.warning(
            f"No seasons with .mkv files found in show '{normalize_path(show_dir).name}'"
        )
    else:
        logger.info(
            f"Found {len(valid_season_paths)} seasons with .mkv files in '{normalize_path(show_dir).name}'"
        )

    return valid_season_paths

check_filename

check_filename(filename)

Check if the filename is in the correct format (S01E02).

PARAMETER	DESCRIPTION
`filename`	The filename to check. TYPE: `str or Path`

RETURNS	DESCRIPTION
`bool`	True if the filename matches the expected pattern.

Source code in mkv_episode_matcher/utils.py

def check_filename(filename):
    """
    Check if the filename is in the correct format (S01E02).

    Args:
        filename (str or Path): The filename to check.

    Returns:
        bool: True if the filename matches the expected pattern.
    """
    # Convert Path object to string if needed
    if isinstance(filename, Path):
        filename = str(filename)
    # Check if the filename matches the expected format
    match = re.search(r".*S\d+E\d+", filename)
    return bool(match)

scramble_filename

scramble_filename(original_file_path, file_number)

Scrambles the filename of the given file path by adding the series title and file number.

PARAMETER	DESCRIPTION
`original_file_path`	The original file path. TYPE: `str`
`file_number`	The file number to be added to the filename. TYPE: `int`

RETURNS	DESCRIPTION
	None

Source code in mkv_episode_matcher/utils.py

def scramble_filename(original_file_path, file_number):
    """
    Scrambles the filename of the given file path by adding the series title and file number.

    Args:
        original_file_path (str): The original file path.
        file_number (int): The file number to be added to the filename.

    Returns:
        None
    """
    logger.info(f"Scrambling {original_file_path}")
    series_title = normalize_path(original_file_path).parent.parent.name
    original_file_name = Path(original_file_path).name
    extension = Path(original_file_path).suffix
    new_file_name = f"{series_title} - {file_number:03d}{extension}"
    new_file_path = Path(original_file_path).parent / new_file_name
    if not new_file_path.exists():
        logger.info(f"Renaming {original_file_name} -> {new_file_name}")
        Path(original_file_path).rename(new_file_path)

rename_episode_file

rename_episode_file(original_file_path, new_filename)

Rename an episode file with a standardized naming convention.

PARAMETER	DESCRIPTION
`original_file_path`	The original file path of the episode. TYPE: `str or Path`
`new_filename`	The new filename including season/episode info. TYPE: `str or Path`

RETURNS	DESCRIPTION
`Path`	Path to the renamed file, or None if rename failed.

Source code in mkv_episode_matcher/utils.py

def rename_episode_file(original_file_path, new_filename):
    """
    Rename an episode file with a standardized naming convention.

    Args:
        original_file_path (str or Path): The original file path of the episode.
        new_filename (str or Path): The new filename including season/episode info.

    Returns:
        Path: Path to the renamed file, or None if rename failed.
    """
    original_dir = Path(original_file_path).parent
    new_file_path = original_dir / new_filename

    # Check if new filepath already exists
    if new_file_path.exists():
        logger.warning(f"File already exists: {new_filename}")

        # Add numeric suffix if file exists
        base, ext = Path(new_filename).stem, Path(new_filename).suffix
        suffix = 2
        while True:
            new_filename = f"{base}_{suffix}{ext}"
            new_file_path = original_dir / new_filename
            if not new_file_path.exists():
                break
            suffix += 1

    try:
        Path(original_file_path).rename(new_file_path)
        logger.info(f"Renamed {Path(original_file_path).name} -> {new_filename}")
        return new_file_path
    except OSError as e:
        logger.error(f"Failed to rename file: {e}")
        return None
    except FileExistsError as e:
        logger.error(f"Failed to rename file: {e}")
        return None

get_subtitles

get_subtitles(show_id, seasons, config=None, max_retries=3)

Retrieves and saves subtitles for a given TV show and seasons.

PARAMETER	DESCRIPTION
`show_id`	The ID of the TV show. TYPE: `int`
`seasons`	A set of season numbers for which subtitles should be retrieved. TYPE: `Set[int]`
`config`	Preloaded configuration. TYPE: `Config object` DEFAULT: `None`
`max_retries`	Number of times to retry subtitle download on OpenSubtitlesException. Defaults to 3. TYPE: `int` DEFAULT: `3`

Source code in mkv_episode_matcher/utils.py

def get_subtitles(show_id, seasons: set[int], config=None, max_retries=3):
    """
    Retrieves and saves subtitles for a given TV show and seasons.

    Args:
        show_id (int): The ID of the TV show.
        seasons (Set[int]): A set of season numbers for which subtitles should be retrieved.
        config (Config object, optional): Preloaded configuration.
        max_retries (int, optional): Number of times to retry subtitle download on OpenSubtitlesException. Defaults to 3.
    """
    if config is None:
        config = get_config(CONFIG_FILE)
    show_dir = config.get("show_dir")
    series_name = sanitize_filename(normalize_path(show_dir).name)
    tmdb_api_key = config.get("tmdb_api_key")
    open_subtitles_api_key = config.get("open_subtitles_api_key")
    open_subtitles_user_agent = config.get("open_subtitles_user_agent")
    open_subtitles_username = config.get("open_subtitles_username")
    open_subtitles_password = config.get("open_subtitles_password")

    if not all([
        show_dir,
        tmdb_api_key,
        open_subtitles_api_key,
        open_subtitles_user_agent,
        open_subtitles_username,
        open_subtitles_password,
    ]):
        logger.error("Missing configuration settings. Please run the setup script.")
        return

    try:
        subtitles = OpenSubtitles(open_subtitles_user_agent, open_subtitles_api_key)
        subtitles.login(open_subtitles_username, open_subtitles_password)
    except Exception as e:
        logger.error(f"Failed to log in to OpenSubtitles: {e}")
        return

    for season in seasons:
        episodes = fetch_season_details(show_id, season)
        logger.info(f"Found {episodes} episodes in Season {season}")

        for episode in range(1, episodes + 1):
            logger.info(f"Processing Season {season}, Episode {episode}...")

            series_cache_dir = Path(CACHE_DIR) / "data" / series_name
            os.makedirs(series_cache_dir, exist_ok=True)

            # Check for existing subtitle in any supported format
            existing_subtitle = find_existing_subtitle(
                series_cache_dir, series_name, season, episode
            )

            if existing_subtitle:
                logger.info(f"Subtitle already exists: {Path(existing_subtitle).name}")
                continue

            # Default to standard format for new downloads
            srt_filepath = str(
                series_cache_dir / f"{series_name} - S{season:02d}E{episode:02d}.srt"
            )

            # get the episode info from TMDB
            url = f"https://api.themoviedb.org/3/tv/{show_id}/season/{season}/episode/{episode}?api_key={tmdb_api_key}"
            response = requests.get(url)
            response.raise_for_status()
            episode_data = response.json()
            episode_id = episode_data["id"]

            # search for the subtitle
            response = subtitles.search(tmdb_id=episode_id, languages="en")
            if len(response.data) == 0:
                logger.warning(
                    f"No subtitles found for {series_name} - S{season:02d}E{episode:02d}"
                )
                continue

            for subtitle in response.data:
                subtitle_dict = subtitle.to_dict()
                # Remove special characters and convert to uppercase
                filename_clean = re.sub(
                    r"\\W+", " ", subtitle_dict["file_name"]
                ).upper()
                if f"E{episode:02d}" in filename_clean:
                    logger.info(f"Original filename: {subtitle_dict['file_name']}")
                    retry_count = 0
                    while retry_count < max_retries:
                        try:
                            srt_file = subtitles.download_and_save(subtitle)
                            shutil.move(srt_file, srt_filepath)
                            logger.info(f"Subtitle saved to {srt_filepath}")
                            break
                        except OpenSubtitlesException as e:
                            retry_count += 1
                            logger.error(
                                f"OpenSubtitlesException (attempt {retry_count}): {e}"
                            )
                            console.print(
                                f"[red]OpenSubtitlesException (attempt {retry_count}): {e}[/red]"
                            )
                            if retry_count >= max_retries:
                                user_input = input(
                                    "Would you like to continue matching? (y/n): "
                                )
                                if user_input.strip().lower() != "y":
                                    logger.info(
                                        "User chose to stop matching due to the error."
                                    )
                                    return
                                else:
                                    logger.info(
                                        "User chose to continue matching despite the error."
                                    )
                                    break
                        except Exception as e:
                            logger.error(f"Failed to download and save subtitle: {e}")
                            console.print(
                                f"[red]Failed to download and save subtitle: {e}[/red]"
                            )
                            user_input = input(
                                "Would you like to continue matching despite the error? (y/n): "
                            )
                            if user_input.strip().lower() != "y":
                                logger.info(
                                    "User chose to stop matching due to the error."
                                )
                                return
                            else:
                                logger.info(
                                    "User chose to continue matching despite the error."
                                )
                                break
                    else:
                        continue
                    break

process_reference_srt_files

process_reference_srt_files(series_name)

Process reference SRT files for a given series.

PARAMETER	DESCRIPTION
`series_name`	The name of the series. TYPE: `str`

RETURNS	DESCRIPTION
`dict`	A dictionary containing the reference files where the keys are the MKV filenames and the values are the corresponding SRT texts.

Source code in mkv_episode_matcher/utils.py

@logger.catch
def process_reference_srt_files(series_name):
    """
    Process reference SRT files for a given series.

    Args:
        series_name (str): The name of the series.

    Returns:
        dict: A dictionary containing the reference files where the keys are the MKV filenames
              and the values are the corresponding SRT texts.
    """
    from mkv_episode_matcher.__main__ import CACHE_DIR

    reference_files = {}
    reference_dir = Path(CACHE_DIR) / "data" / series_name

    for dirpath, _, filenames in os.walk(reference_dir):
        for filename in filenames:
            if filename.lower().endswith(".srt"):
                srt_file = Path(dirpath) / filename
                logger.info(f"Processing {srt_file}")
                srt_text = extract_srt_text(srt_file)
                season, episode = extract_season_episode(filename)
                mkv_filename = f"{series_name} - S{season:02}E{episode:02}.mkv"
                reference_files[mkv_filename] = srt_text

    return reference_files

extract_srt_text

extract_srt_text(filepath)

Extracts text content from an SRT file.

PARAMETER	DESCRIPTION
`filepath`	Path to the SRT file. TYPE: `str`

RETURNS	DESCRIPTION
`list`	List of text lines from the SRT file.

Source code in mkv_episode_matcher/utils.py

def extract_srt_text(filepath):
    """
    Extracts text content from an SRT file.

    Args:
        filepath (str): Path to the SRT file.

    Returns:
        list: List of text lines from the SRT file.
    """
    # Read the file content
    with open(filepath) as f:
        content = f.read()

    # Split into subtitle blocks
    blocks = content.strip().split("\n\n")

    text_lines = []
    for block in blocks:
        lines = block.split("\n")
        if len(lines) < 3:
            continue

        # Skip index and timestamp, get all remaining lines as text
        text = " ".join(lines[2:])
        # Remove stage directions and tags
        text = re.sub(r"\[.*?\]|\<.*?\>", "", text)
        if text:
            text_lines.append(text)

    return text_lines

extract_season_episode

extract_season_episode(filename)

Extract season and episode numbers from filename with support for multiple formats.

PARAMETER	DESCRIPTION
`filename`	Filename to parse TYPE: `str`

RETURNS	DESCRIPTION
`tuple`	(season_number, episode_number)

Source code in mkv_episode_matcher/utils.py

def extract_season_episode(filename):
    """
    Extract season and episode numbers from filename with support for multiple formats.

    Args:
        filename (str): Filename to parse

    Returns:
        tuple: (season_number, episode_number)
    """
    # List of patterns to try
    patterns = [
        r"S(\d+)E(\d+)",  # S01E01
        r"(\d+)x(\d+)",  # 1x01 or 01x01
        r"Season\s*(\d+).*?(\d+)",  # Season 1 - 01
    ]

    for pattern in patterns:
        match = re.search(pattern, filename, re.IGNORECASE)
        if match:
            return int(match.group(1)), int(match.group(2))

    return None, None

process_srt_files

process_srt_files(show_dir)

Process all SRT files in the given directory and its subdirectories.

PARAMETER	DESCRIPTION
`show_dir`	The directory path where the SRT files are located. TYPE: `str`

RETURNS	DESCRIPTION
`dict`	A dictionary containing the SRT file paths as keys and their corresponding text content as values.

Source code in mkv_episode_matcher/utils.py

def process_srt_files(show_dir):
    """
    Process all SRT files in the given directory and its subdirectories.

    Args:
        show_dir (str): The directory path where the SRT files are located.

    Returns:
        dict: A dictionary containing the SRT file paths as keys and their corresponding text content as values.
    """
    srt_files = {}
    for dirpath, _, filenames in os.walk(show_dir):
        for filename in filenames:
            if filename.lower().endswith(".srt"):
                srt_file = Path(dirpath) / filename
                logger.info(f"Processing {srt_file}")
                srt_text = extract_srt_text(srt_file)
                srt_files[srt_file] = srt_text
    return srt_files

compare_and_rename_files

compare_and_rename_files(srt_files, reference_files, dry_run=False)

Compare the srt files with the reference files and rename the matching mkv files.

PARAMETER	DESCRIPTION
`srt_files`	A dictionary containing the srt files as keys and their contents as values. TYPE: `dict`
`reference_files`	A dictionary containing the reference files as keys and their contents as values. TYPE: `dict`
`dry_run`	If True, the function will only log the renaming actions without actually renaming the files. Defaults to False. TYPE: `bool` DEFAULT: `False`

Source code in mkv_episode_matcher/utils.py

def compare_and_rename_files(srt_files, reference_files, dry_run=False):
    """
    Compare the srt files with the reference files and rename the matching mkv files.

    Args:
        srt_files (dict): A dictionary containing the srt files as keys and their contents as values.
        reference_files (dict): A dictionary containing the reference files as keys and their contents as values.
        dry_run (bool, optional): If True, the function will only log the renaming actions without actually renaming the files. Defaults to False.
    """
    logger.info(
        f"Comparing {len(srt_files)} srt files with {len(reference_files)} reference files"
    )
    for srt_text in srt_files.keys():
        parent_dir = Path(srt_text).parent.parent
        for reference in reference_files.keys():
            _season, _episode = extract_season_episode(reference)
            mkv_file = str(parent_dir / Path(srt_text).name.replace(".srt", ".mkv"))
            matching_lines = compare_text(
                reference_files[reference], srt_files[srt_text]
            )
            if matching_lines >= int(len(reference_files[reference]) * 0.1):
                logger.info(f"Matching lines: {matching_lines}")
                logger.info(f"Found matching file: {mkv_file} ->{reference}")
                new_filename = parent_dir / reference
                if not dry_run:
                    logger.info(f"Renaming {mkv_file} to {str(new_filename)}")
                    rename_episode_file(mkv_file, reference)

compare_text

compare_text(text1, text2)

Compare two lists of text lines and return the number of matching lines.

PARAMETER	DESCRIPTION
`text1`	List of text lines from the first source. TYPE: `list`
`text2`	List of text lines from the second source. TYPE: `list`

RETURNS	DESCRIPTION
`int`	Number of matching lines between the two sources.

Source code in mkv_episode_matcher/utils.py

def compare_text(text1, text2):
    """
    Compare two lists of text lines and return the number of matching lines.

    Args:
        text1 (list): List of text lines from the first source.
        text2 (list): List of text lines from the second source.

    Returns:
        int: Number of matching lines between the two sources.
    """
    # Flatten the list of text lines
    flat_text1 = [line for lines in text1 for line in lines]
    flat_text2 = [line for lines in text2 for line in lines]

    # Compare the two lists of text lines
    matching_lines = set(flat_text1).intersection(flat_text2)
    return len(matching_lines)

Configuration

mkv_episode_matcher.config

Functions

set_config

set_config(tmdb_api_key, open_subtitles_api_key, open_subtitles_user_agent, open_subtitles_username, open_subtitles_password, show_dir, file)

Sets the configuration values and writes them to a file.

PARAMETER	DESCRIPTION
`tmdb_api_key`	The API key for TMDB (The Movie Database). TYPE: `str`
`open_subtitles_api_key`	The API key for OpenSubtitles. TYPE: `str`
`open_subtitles_user_agent`	The user agent for OpenSubtitles. TYPE: `str`
`open_subtitles_username`	The username for OpenSubtitles. TYPE: `str`
`open_subtitles_password`	The password for OpenSubtitles. TYPE: `str`
`show_dir`	The directory where the TV show episodes are located. TYPE: `str`
`file`	The path to the configuration file. TYPE: `str`

RETURNS	DESCRIPTION
	None

Source code in mkv_episode_matcher/config.py

def set_config(
    tmdb_api_key,
    open_subtitles_api_key,
    open_subtitles_user_agent,
    open_subtitles_username,
    open_subtitles_password,
    show_dir,
    file,
):
    """
    Sets the configuration values and writes them to a file.

    Args:
        tmdb_api_key (str): The API key for TMDB (The Movie Database).
        open_subtitles_api_key (str): The API key for OpenSubtitles.
        open_subtitles_user_agent (str): The user agent for OpenSubtitles.
        open_subtitles_username (str): The username for OpenSubtitles.
        open_subtitles_password (str): The password for OpenSubtitles.
        show_dir (str): The directory where the TV show episodes are located.
        file (str): The path to the configuration file.

    Returns:
        None
    """
    config = configparser.ConfigParser(interpolation=None)
    config["Config"] = {
        "tmdb_api_key": str(tmdb_api_key),
        "show_dir": show_dir,
        "max_threads": int(MAX_THREADS),
        "open_subtitles_api_key": str(open_subtitles_api_key),
        "open_subtitles_user_agent": str(open_subtitles_user_agent),
        "open_subtitles_username": str(open_subtitles_username),
        "open_subtitles_password": str(open_subtitles_password),
    }
    logger.info(
        f"Setting config with API:{tmdb_api_key}, show_dir: {show_dir}, and max_threads: {MAX_THREADS}"
    )
    with open(file, "w", encoding="utf-8") as configfile:
        config.write(configfile)

get_config

get_config(file)

Read and return the configuration from the specified file.

PARAMETER	DESCRIPTION
`file`	The path to the configuration file. TYPE: `str`

RETURNS	DESCRIPTION
`dict`	The configuration settings as a dictionary.

Source code in mkv_episode_matcher/config.py

def get_config(file):
    """
    Read and return the configuration from the specified file.

    Args:
        file (str): The path to the configuration file.

    Returns:
        dict: The configuration settings as a dictionary.

    """
    logger.info(f"Loading config from {file}")
    config = configparser.ConfigParser(interpolation=None)
    if Path(file).exists():
        config.read(file, encoding="utf-8")
        return config["Config"] if "Config" in config else None
    return {}

API Reference

Core Modules

mkv_episode_matcher.__main__

Functions

print_welcome_message

confirm_api_key

mask_api_key

select_season

onboarding

main

mkv_episode_matcher.episode_matcher

Classes

Functions

process_show

mkv_episode_matcher.episode_identification

Classes

SubtitleCache

Functions

get_subtitle_content

get_chunk

EpisodeMatcher

Functions

extract_audio_chunk

load_reference_chunk

get_reference_files

identify_episode

SubtitleReader

Functions

parse_timestamp staticmethod

read_srt_file staticmethod

extract_subtitle_chunk staticmethod

Functions

get_video_duration cached

detect_file_encoding

read_file_with_fallback cached

get_whisper_model

TMDB Client

mkv_episode_matcher.tmdb_client

Classes

RateLimitedRequest

Functions

get

Functions

fetch_show_id

fetch_season_details

get_number_of_seasons

Utilities

mkv_episode_matcher.utils

Functions

normalize_path

get_valid_seasons

check_filename

scramble_filename

rename_episode_file

get_subtitles

process_reference_srt_files

extract_srt_text

extract_season_episode

process_srt_files

compare_and_rename_files

compare_text

Configuration

mkv_episode_matcher.config

Functions

set_config

get_config

mkv_episode_matcher.main

parse_timestamp `staticmethod`

read_srt_file `staticmethod`

extract_subtitle_chunk `staticmethod`

get_video_duration `cached`

read_file_with_fallback `cached`