Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

### General

- Avoid deleting files ignored by git during `pipelines sync` ([#3847](https://github.com/nf-core/tools/pull/3847))

### Template

### Linting
Expand Down
78 changes: 63 additions & 15 deletions nf_core/pipelines/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import logging
import os
import re
import shutil
from pathlib import Path
from typing import Any

Expand Down Expand Up @@ -140,7 +139,7 @@ def sync(self) -> None:
self.inspect_sync_dir()
self.get_wf_config()
self.checkout_template_branch()
self.delete_template_branch_files()
self.delete_tracked_template_branch_files()
self.make_template_pipeline()
self.commit_template_changes()

Expand Down Expand Up @@ -194,9 +193,14 @@ def inspect_sync_dir(self):
# Check to see if there are uncommitted changes on current branch
if self.repo.is_dirty(untracked_files=True):
raise SyncExceptionError(
"Uncommitted changes found in pipeline directory!\nPlease commit these before running nf-core pipelines sync"
"Uncommitted changes found in pipeline directory!\n"
"Please commit these before running nf-core pipelines sync.\n"
"(Hint: .gitignored files are ignored.)"
)

# Track ignored files to avoid processing them
self.ignored_files = self._get_ignored_files()

def get_wf_config(self):
"""Check out the target branch if requested and fetch the nextflow config.
Check that we have the required config variables.
Expand Down Expand Up @@ -240,25 +244,51 @@ def checkout_template_branch(self):
except GitCommandError:
raise SyncExceptionError("Could not check out branch 'origin/TEMPLATE' or 'TEMPLATE'")

def delete_template_branch_files(self):
def delete_tracked_template_branch_files(self):
"""
Delete all files in the TEMPLATE branch
Delete all tracked files and subsequent empty directories in the TEMPLATE branch
"""
# Delete everything
log.info("Deleting all files in 'TEMPLATE' branch")
for the_file in os.listdir(self.pipeline_dir):
if the_file == ".git":
continue
# Delete tracked files
log.info("Deleting tracked files in 'TEMPLATE' branch")
self._delete_tracked_files()
self._clean_up_empty_dirs()

def _delete_tracked_files(self):
"""
Delete all tracked files in the repository
"""
for the_file in self._get_tracked_files():
file_path = os.path.join(self.pipeline_dir, the_file)
log.debug(f"Deleting {file_path}")
try:
if os.path.isfile(file_path):
os.unlink(file_path)
elif os.path.isdir(file_path):
shutil.rmtree(file_path)
os.unlink(file_path)
except Exception as e:
raise SyncExceptionError(e)

def _clean_up_empty_dirs(self):
"""
Delete empty directories in the repository

Walks the directory tree from the bottom up, deleting empty directories as it goes.
"""
# Track deleted child directories so we know they've been deleted when evaluating if the parent is empty
deleted = set()

for curr_dir, sub_dirs, files in os.walk(self.pipeline_dir, topdown=False):
# Don't delete the root directory (should never happen due to .git, but just in case)
if curr_dir == str(self.pipeline_dir):
continue

subdir_set = set(os.path.join(curr_dir, d) for d in sub_dirs)
currdir_is_empty = (len(subdir_set - deleted) == 0) and (len(files) == 0)
if currdir_is_empty:
log.debug(f"Deleting empty directory {curr_dir}")
try:
os.rmdir(curr_dir)
except Exception as e:
raise SyncExceptionError(e)
deleted.add(curr_dir)

def make_template_pipeline(self):
"""
Delete all files and make a fresh template using the workflow variables
Expand Down Expand Up @@ -311,7 +341,9 @@ def commit_template_changes(self):
return False
# Commit changes
try:
self.repo.git.add(A=True)
# add and commit all files except self.ignored_files
# :! syntax to exclude files using git pathspec
self.repo.git.add([f":!{f}" for f in self.ignored_files], all=True)
self.repo.index.commit(f"Template update for nf-core/tools version {nf_core.__version__}")
self.made_changes = True
log.info("Committed changes to 'TEMPLATE' branch")
Expand Down Expand Up @@ -503,3 +535,19 @@ def reset_target_dir(self):
self.repo.git.checkout(self.original_branch)
except GitCommandError as e:
raise SyncExceptionError(f"Could not reset to original branch `{self.original_branch}`:\n{e}")

def _get_ignored_files(self) -> list[str]:
"""
Get a list of all files in the repo ignored by git.
"""
# -z separates with \0 and makes sure special characters are handled correctly
raw_ignored_files = self.repo.git.ls_files(z=True, ignored=True, others=True, exclude_standard=True)
return raw_ignored_files.split("\0")[:-1] if raw_ignored_files else []

def _get_tracked_files(self) -> list[str]:
"""
Get a list of all files in the repo tracked by git.
"""
# -z separates with \0 and makes sure special characters are handled correctly
raw_tracked_files = self.repo.git.ls_files(z=True)
return raw_tracked_files.split("\0")[:-1] if raw_tracked_files else []
180 changes: 173 additions & 7 deletions tests/pipelines/test_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ def json(self):
return self.data


def mocked_requests_get(url) -> MockResponse:
"""Helper function to emulate POST requests responses from the web"""
def mocked_requests_get(url, params=None, **kwargs) -> MockResponse:
"""Helper function to emulate GET request responses from the web"""

url_template = "https://api.github.com/repos/{}/response/"
if url == Path(url_template.format("no_existing_pr"), "pulls?head=TEMPLATE&base=None"):
Expand Down Expand Up @@ -120,6 +120,17 @@ def test_inspect_sync_dir_dirty(self):
finally:
os.remove(test_fn)

def test_inspect_sync_ignored_files(self):
"""
Try inspecting the repo for syncing with untracked changes that are ignored.
No assertions, we are checking that no exception is raised in the process.
"""
test_fn = Path(self.pipeline_dir) / "ignored.txt"
self._make_ignored_file(test_fn)

psync = nf_core.pipelines.sync.PipelineSync(self.pipeline_dir)
psync.inspect_sync_dir()

def test_get_wf_config_no_branch(self):
"""Try getting a workflow config when the branch doesn't exist"""
# Try to sync, check we halt with the right error
Expand Down Expand Up @@ -161,23 +172,131 @@ def test_checkout_template_branch_no_template(self):
psync.checkout_template_branch()
assert exc_info.value.args[0] == "Could not check out branch 'origin/TEMPLATE' or 'TEMPLATE'"

def test_delete_template_branch_files(self):
"""Confirm that we can delete all files in the TEMPLATE branch"""
def test_delete_tracked_template_branch_files(self):
"""Confirm that we can delete all tracked files in the TEMPLATE branch"""
psync = nf_core.pipelines.sync.PipelineSync(self.pipeline_dir)
psync.inspect_sync_dir()
psync.get_wf_config()
psync.checkout_template_branch()
psync.delete_template_branch_files()
psync.delete_tracked_template_branch_files()
assert os.listdir(self.pipeline_dir) == [".git"]

def test_delete_tracked_template_branch_files_unlink_throws_error(self):
"""Test that SyncExceptionError is raised when os.unlink throws an exception"""
psync = nf_core.pipelines.sync.PipelineSync(self.pipeline_dir)
psync.inspect_sync_dir()
psync.get_wf_config()
psync.checkout_template_branch()

# Create a test file that would normally be deleted
test_file = Path(self.pipeline_dir) / "test_file.txt"
test_file.touch()

# Mock os.unlink to raise an exception
with mock.patch("os.unlink", side_effect=OSError("Permission denied")) as mock_unlink:
with pytest.raises(nf_core.pipelines.sync.SyncExceptionError) as exc_info:
psync.delete_tracked_template_branch_files()

# Verify the exception contains the original error
assert "Permission denied" in str(exc_info.value)

# Verify os.unlink was called
mock_unlink.assert_called()

def test_delete_tracked_template_branch_rmdir_throws_error(self):
"""Test that SyncExceptionError is raised when os.rmdir throws an exception"""
psync = nf_core.pipelines.sync.PipelineSync(self.pipeline_dir)
psync.inspect_sync_dir()
psync.get_wf_config()
psync.checkout_template_branch()

# Create an empty directory that would normally be deleted
empty_dir = Path(self.pipeline_dir) / "empty_test_dir"
empty_dir.mkdir()

# Mock os.rmdir to raise an exception
with mock.patch("os.rmdir", side_effect=OSError("Permission denied")) as mock_rmdir:
with pytest.raises(nf_core.pipelines.sync.SyncExceptionError) as exc_info:
psync.delete_tracked_template_branch_files()

# Verify the exception contains the original error
assert "Permission denied" in str(exc_info.value)

# Verify os.rmdir was called
mock_rmdir.assert_called()

def test_delete_staged_template_branch_files_ignored(self):
"""Confirm that files in .gitignore are not deleted by delete_staged_template_branch_files"""
psync = nf_core.pipelines.sync.PipelineSync(self.pipeline_dir)

ignored_file = Path(self.pipeline_dir) / "ignored.txt"
self._make_ignored_file(ignored_file)

psync.inspect_sync_dir()
psync.get_wf_config()
psync.checkout_template_branch()
psync.delete_tracked_template_branch_files()

# Ignored file should still exist
assert ignored_file.exists()

# .git directory should still exist
assert (Path(self.pipeline_dir) / ".git").exists()

def test_delete_staged_template_branch_files_ignored_nested_dir(self):
"""Confirm that deletion of ignored files respects directory structure"""
psync = nf_core.pipelines.sync.PipelineSync(self.pipeline_dir)
repo = git.Repo(self.pipeline_dir)

# Create this structure:
# dir
# ├── subdirA # (should be kept)
# │   └── subdirB # (should be kept)
# │   └── ignored.txt # add to .gitignore (should be kept)
# └── subdirC # (should be deleted)
# └── subdirD # (should be deleted)
# └── not_ignored.txt # commit this file (should be deleted)
parent_dir = Path(self.pipeline_dir) / "dir"
to_be_kept_dir = parent_dir / "subdirA" / "subdirB"
ignored_file = to_be_kept_dir / "ignored.txt"
to_be_deleted_dir = parent_dir / "subdirC" / "subdirD"
non_ignored_file = to_be_deleted_dir / "not_ignored.txt"

to_be_kept_dir.mkdir(parents=True)
to_be_deleted_dir.mkdir(parents=True)
non_ignored_file.touch()

repo.git.add(non_ignored_file)
repo.index.commit("Add non-ignored file")

self._make_ignored_file(ignored_file)

psync.inspect_sync_dir()
psync.get_wf_config()
psync.checkout_template_branch()
psync.delete_tracked_template_branch_files()

# Ignored file and its parent directory should still exist
assert ignored_file.exists()
assert to_be_kept_dir.exists() # subdirB
assert to_be_kept_dir.parent.exists() # subdirA

# Non-ignored file and its parent directory should be deleted
assert not non_ignored_file.exists()
assert not to_be_deleted_dir.exists() # subdirD
assert not to_be_deleted_dir.parent.exists() # subdirC

# .git directory should still exist
assert (Path(self.pipeline_dir) / ".git").exists()

def test_create_template_pipeline(self):
"""Confirm that we can delete all files in the TEMPLATE branch"""
"""Confirm that we can create a new template pipeline in an empty directory"""
# First, delete all the files
psync = nf_core.pipelines.sync.PipelineSync(self.pipeline_dir)
psync.inspect_sync_dir()
psync.get_wf_config()
psync.checkout_template_branch()
psync.delete_template_branch_files()
psync.delete_tracked_template_branch_files()
assert os.listdir(self.pipeline_dir) == [".git"]
# Now create the new template
psync.make_template_pipeline()
Expand Down Expand Up @@ -211,6 +330,22 @@ def test_commit_template_changes_changes(self):
# Check that we don't have any uncommitted changes
assert psync.repo.is_dirty(untracked_files=True) is False

def test_commit_template_preserves_ignored(self):
"""Try to commit the TEMPLATE branch, but no changes were made"""
# Check out the TEMPLATE branch but skip making the new template etc.
psync = nf_core.pipelines.sync.PipelineSync(self.pipeline_dir)

ignored_file = Path(self.pipeline_dir) / "ignored.txt"

self._make_ignored_file(ignored_file)

psync.inspect_sync_dir()
psync.get_wf_config()
psync.checkout_template_branch()
psync.commit_template_changes()

assert ignored_file.exists()

def test_push_template_branch_error(self):
"""Try pushing the changes, but without a remote (should fail)"""
# Check out the TEMPLATE branch but skip making the new template etc.
Expand Down Expand Up @@ -430,3 +565,34 @@ def test_sync_no_github_token(self):
with self.assertRaises(nf_core.pipelines.sync.PullRequestExceptionError) as exc_info:
psync.sync()
self.assertIn("GITHUB_AUTH_TOKEN not set!", str(exc_info.exception))

def test_sync_preserves_ignored_files(self):
"""Test that sync preserves files and directories specified in .gitignore"""
with (
mock.patch("requests.get", side_effect=mocked_requests_get),
):
psync = nf_core.pipelines.sync.PipelineSync(self.pipeline_dir)

ignored_file = Path(self.pipeline_dir) / "ignored.txt"
self._make_ignored_file(ignored_file)

psync.made_changes = True

psync.sync()

self.assertTrue(ignored_file.exists())

def _make_ignored_file(self, file_path: Path):
"""Helper function to create an ignored file."""
if not self.pipeline_dir:
raise ValueError("Instantiate a pipeline before adding ignored files.")

file_path.touch()

gitignore_path = Path(self.pipeline_dir) / ".gitignore"
with open(gitignore_path, "a") as f:
f.write(f"{file_path.name}\n")

repo = git.Repo(self.pipeline_dir)
repo.git.add(".gitignore")
repo.index.commit("Add .gitignore")