You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

341 lines
11 KiB

#!/usr/bin/env python3
"""
StarCraft MPQ Asset Extractor
==============================
Extracts and organizes all assets from Starcraft.mpq into a structured
directory layout suitable for use with alternative game engines.
Usage:
python extract_starcraft_assets.py [--output DIR] [--mpq FILE]
Output Structure:
assets/
├── audio/ # All audio files (.wav)
├── graphics/ # Graphics and images (.pcx, .grp, .smk, .bik)
├── video/ # Video files (.smk, .bik)
├── data/ # Game data files (.dat, .bin, .tbl)
├── maps/ # Map files (.chk, .scm, .scx)
├── fonts/ # Font files (.fnt)
├── text/ # Text and string files (.txt, .tbl)
├── scripts/ # Script files (.ais, .aiscript)
└── unknown/ # Unknown/unclassified files
"""
import sys
import argparse
from pathlib import Path
from collections import defaultdict
import time
try:
from pystorm import MPQArchive, StormLibError
except ImportError:
print("Error: PyStorm not installed. Please run: pip install -e .")
sys.exit(1)
# File type categorization
FILE_CATEGORIES = {
'audio': ['.wav', '.ogg', '.mp3'],
'graphics': ['.pcx', '.grp', '.dds', '.tga', '.bmp'],
'video': ['.smk', '.bik', '.avi'],
'data': ['.dat', '.bin', '.pal', '.wpe', '.cv5', '.vf4', '.vx4', '.vr4'],
'maps': ['.chk', '.scm', '.scx'],
'fonts': ['.fnt', '.ttf'],
'text': ['.txt', '.tbl', '.rtf'],
'scripts': ['.ais', '.aiscript', '.ai'],
'models': ['.m3', '.m2', '.mdx', '.mdl'],
'shaders': ['.fx', '.hlsl', '.glsl'],
'config': ['.ini', '.cfg', '.json', '.xml'],
}
def categorize_file(filename: str) -> str:
"""
Categorize a file based on its extension.
Args:
filename: The filename to categorize
Returns:
Category name (e.g., 'audio', 'graphics', 'unknown')
"""
ext = Path(filename).suffix.lower()
for category, extensions in FILE_CATEGORIES.items():
if ext in extensions:
return category
# Special handling for files without extension
if not ext or ext == '.xxx':
# Try to guess from filename patterns
name_lower = filename.lower()
if 'sound' in name_lower or 'music' in name_lower:
return 'audio'
elif 'video' in name_lower or 'movie' in name_lower:
return 'video'
elif 'image' in name_lower or 'sprite' in name_lower:
return 'graphics'
elif 'map' in name_lower:
return 'maps'
elif 'script' in name_lower:
return 'scripts'
return 'unknown'
def get_file_info(file_data: dict) -> str:
"""
Get a human-readable info string for a file.
Args:
file_data: Dictionary with file information
Returns:
Info string with size and compression info
"""
size = file_data['size']
compressed = file_data['compressed_size']
if size > 0:
ratio = ((size - compressed) / size) * 100 if size > 0 else 0
return f"{format_size(size):>10} -> {format_size(compressed):>10} ({ratio:>5.1f}% compressed)"
else:
return f"{format_size(compressed):>10} (packed)"
def format_size(size_bytes: int) -> str:
"""Format size in bytes to human-readable format"""
if size_bytes < 1024:
return f"{size_bytes} B"
elif size_bytes < 1024 * 1024:
return f"{size_bytes / 1024:.1f} KB"
else:
return f"{size_bytes / 1024 / 1024:.1f} MB"
def extract_and_organize(mpq_path: str, output_dir: str, verbose: bool = True):
"""
Extract and organize all files from an MPQ archive.
Args:
mpq_path: Path to the MPQ file
output_dir: Output directory for extracted assets
verbose: Print detailed progress information
"""
output_path = Path(output_dir)
output_path.mkdir(parents=True, exist_ok=True)
# Statistics
stats = {
'total_files': 0,
'extracted': 0,
'failed': 0,
'by_category': defaultdict(int),
'total_size': 0,
'total_compressed': 0,
}
print("=" * 80)
print("StarCraft MPQ Asset Extractor")
print("=" * 80)
print(f"\nInput: {mpq_path}")
print(f"Output: {output_path.absolute()}\n")
# Open the archive
try:
print("Opening MPQ archive...")
archive = MPQArchive(mpq_path)
print("✓ Archive opened successfully\n")
except StormLibError as e:
print(f"✗ Error opening archive: {e}")
return False
try:
# List all files
print("Scanning archive contents...")
files = archive.find_files("*")
stats['total_files'] = len(files)
print(f"✓ Found {len(files)} files\n")
if len(files) == 0:
print("⚠ No files found in archive")
return False
# Organize files by category
files_by_category = defaultdict(list)
for file_info in files:
category = categorize_file(file_info['name'])
files_by_category[category].append(file_info)
stats['by_category'][category] += 1
stats['total_size'] += file_info['size']
stats['total_compressed'] += file_info['compressed_size']
# Print category summary
print("File Categories:")
print("-" * 80)
for category in sorted(files_by_category.keys()):
count = len(files_by_category[category])
print(f" {category:.<20} {count:>4} files")
print("-" * 80 + "\n")
# Extract files category by category
start_time = time.time()
for category in sorted(files_by_category.keys()):
category_files = files_by_category[category]
category_dir = output_path / category
category_dir.mkdir(parents=True, exist_ok=True)
print(f"Extracting {category}/ ({len(category_files)} files)...")
for i, file_info in enumerate(category_files, 1):
filename = file_info['name']
# Create subdirectory structure if file has path separators
if '\\' in filename or '/' in filename:
# Normalize path separators
rel_path = filename.replace('\\', '/')
output_file = category_dir / rel_path
output_file.parent.mkdir(parents=True, exist_ok=True)
else:
output_file = category_dir / filename
try:
archive.extract_file(filename, str(output_file))
stats['extracted'] += 1
if verbose and i % 50 == 0:
progress = (i / len(category_files)) * 100
print(f" Progress: {progress:>5.1f}% ({i}/{len(category_files)})")
except Exception as e:
stats['failed'] += 1
if verbose:
print(f" ✗ Failed: {filename} - {e}")
print(f" ✓ Completed {category}/ - {len(category_files)} files\n")
elapsed = time.time() - start_time
except Exception as e:
print(f"\n✗ Error during extraction: {e}")
import traceback
traceback.print_exc()
return False
finally:
archive.close()
# Print final statistics
print("=" * 80)
print("Extraction Complete!")
print("=" * 80)
print(f"\nStatistics:")
print(f" Total files: {stats['total_files']:>6}")
print(f" Extracted: {stats['extracted']:>6}")
print(f" Failed: {stats['failed']:>6}")
print(f" Time elapsed: {elapsed:>6.1f}s")
print(f"\nStorage:")
print(f" Uncompressed size: {format_size(stats['total_size'])}")
print(f" Compressed size: {format_size(stats['total_compressed'])}")
if stats['total_size'] > 0:
ratio = ((stats['total_size'] - stats['total_compressed']) / stats['total_size']) * 100
print(f" Compression ratio: {ratio:.1f}%")
print(f"\nFiles by category:")
for category in sorted(stats['by_category'].keys()):
count = stats['by_category'][category]
percentage = (count / stats['total_files']) * 100
print(f" {category:.<20} {count:>4} files ({percentage:>5.1f}%)")
print(f"\n✓ All assets extracted to: {output_path.absolute()}")
print("\nNext steps:")
print(" 1. Review the extracted files in the assets/ directory")
print(" 2. Read STARCRAFT_ASSETS.md for file format documentation")
print(" 3. Integrate assets into your game engine")
return True
def main():
"""Main entry point"""
parser = argparse.ArgumentParser(
description="Extract and organize StarCraft MPQ assets",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__
)
parser.add_argument(
'--mpq',
default='Starcraft.mpq',
help='Path to the MPQ file (default: Starcraft.mpq)'
)
parser.add_argument(
'--output',
'-o',
default='assets',
help='Output directory for extracted assets (default: assets/)'
)
parser.add_argument(
'--quiet',
'-q',
action='store_true',
help='Suppress verbose output'
)
parser.add_argument(
'--list-only',
'-l',
action='store_true',
help='Only list files without extracting'
)
args = parser.parse_args()
# Check if MPQ file exists
if not Path(args.mpq).exists():
print(f"Error: MPQ file not found: {args.mpq}")
print(f"\nPlease provide the path to your StarCraft MPQ file:")
print(f" python {sys.argv[0]} --mpq /path/to/Starcraft.mpq")
return 1
# List only mode
if args.list_only:
try:
archive = MPQArchive(args.mpq)
files = archive.find_files("*")
files_by_category = defaultdict(list)
for file_info in files:
category = categorize_file(file_info['name'])
files_by_category[category].append(file_info['name'])
print(f"\nFiles in {args.mpq}:")
print("=" * 80)
for category in sorted(files_by_category.keys()):
print(f"\n{category.upper()}:")
print("-" * 80)
for filename in sorted(files_by_category[category]):
print(f" {filename}")
archive.close()
return 0
except Exception as e:
print(f"Error: {e}")
return 1
# Extract files
success = extract_and_organize(
args.mpq,
args.output,
verbose=not args.quiet
)
return 0 if success else 1
if __name__ == "__main__":
sys.exit(main())