feat(git-extract): make git extract work with whitelisting files

This commit is contained in:
Mohammad Rafiq 2025-03-06 20:11:14 +08:00
parent 17aa39b73c
commit f201190ef1

View file

@ -166,21 +166,30 @@ def get_latest_text_files_to_stdout(remote_repo_url=None, ignored_files=None):
working directory (if no URL is provided) to a temporary folder, working directory (if no URL is provided) to a temporary folder,
and then prints the contents of all files identified as text files to stdout, and then prints the contents of all files identified as text files to stdout,
prepended by their relative paths from the repository root, excluding specified prepended by their relative paths from the repository root, excluding specified
ignored files. ignored files. Supports "!" to specify includes only.
Args: Args:
remote_repo_url: The URL of the remote Git repository (optional). If None, remote_repo_url: The URL of the remote Git repository (optional). If None,
the current working directory is assumed to be a Git repo. the current working directory is assumed to be a Git repo.
ignored_files: A list of files or directories to ignore (relative to the repo root). ignored_files: A list of files or directories to ignore (relative to the repo root).
If a list contains a value starting with "!", it means "include only".
""" """
temp_dir = None temp_dir = None
if ignored_files is None: if ignored_files is None:
ignored_files = [] ignored_files = []
# Ensure .git and .gitignore are always ignored # Ensure .git and .gitignore are always ignored (unless include only is specified)
ignored_files.extend([".git", ".gitignore"]) include_only = any(item.startswith("!") for item in ignored_files)
ignored_files = list(set(ignored_files)) # remove duplicates if not include_only:
ignored_files.extend([".git", ".gitignore"])
ignored_files = list(set(ignored_files)) # remove duplicates
# Determine if "include only" is active and extract the include paths
include_only = any(item.startswith("!") for item in ignored_files)
include_paths = [item[1:] for item in ignored_files if item.startswith("!")]
ignore_paths = [item for item in ignored_files if not item.startswith("!")]
try: try:
# Create a temporary directory # Create a temporary directory
@ -215,25 +224,36 @@ def get_latest_text_files_to_stdout(remote_repo_url=None, ignored_files=None):
file_path = os.path.join(root, file) file_path = os.path.join(root, file)
relative_path = os.path.relpath(file_path, temp_dir) relative_path = os.path.relpath(file_path, temp_dir)
# Check if the file or any of its parent directories are ignored if include_only:
ignore = False # Include only logic
path_components = relative_path.split( include = False
os.sep for include_path in include_paths:
) # split based on OS-specific path separator if relative_path.startswith(include_path):
current_path = "" include = True
for component in path_components: break
current_path = ( if not include:
os.path.join(current_path, component) continue # Skip if not in include paths
if current_path else:
else component # Ignore logic (standard ignore)
) # prevent empty first join ignore = False
if current_path in ignored_files: path_components = relative_path.split(
ignore = True os.sep
break ) # split based on OS-specific path separator
current_path = ""
for component in path_components:
current_path = (
os.path.join(current_path, component)
if current_path
else component
) # prevent empty first join
if current_path in ignore_paths:
ignore = True
break
if ignore:
continue
if not ignore: if is_text_file(file_path): # Use the is_text_file function
if is_text_file(file_path): # Use the is_text_file function text_files.append(file_path)
text_files.append(file_path)
# Print the contents of each text file, prepended by its relative path # Print the contents of each text file, prepended by its relative path
for file_path in text_files: for file_path in text_files:
@ -275,7 +295,7 @@ if __name__ == "__main__":
"--ignored-files", "--ignored-files",
nargs="+", nargs="+",
default=[], default=[],
help="Files or directories to ignore (space-separated).", help="Files or directories to ignore (space-separated). Use !<path> to specify include only.",
) )
args = parser.parse_args() args = parser.parse_args()