Cleanup APTLY repo

After joining my new team, I've seen a huge number of packages in development's  APT repo. We use APTLY to manage our repo, and unfortunately it doesn't provide a way to expire or keep a number of package release. So, I wrote this python script to clean up the repo.

By default, it keeps 20 versions but it can be overridden. It manage APTLY package query : so it could be run against a single package or a set of packages.

Simple package query example

# clean-repo.py --repo buster-dev --package-query vault-server --keep 2 --dry-run
Run in dry mode, without actually deleting the packages.
Remove "vault-server" from buster-dev and keep the last 2 packages.

This package(s) would be kept:
vault-server_1:0.11.4~20190424~buster.build0_amd64
vault-server_1:0.11.4~20190425~buster.build0_amd64
# clean-repo.py --repo buster-dev --package-query vault-server --keep 1 --dry-run
Run in dry mode, without actually deleting the packages.
Remove "vault-server" from buster-dev and keep the last 1 packages.

This package(s) would be kept:
vault-server_1:0.11.4~20190425~buster.build0_amd64

This package(s) would be deleted:
vault-server_1:0.11.4~20190424~buster.build0_amd64

Multiple package query example

# clean-repo.py --repo buster-dev --package-query 'Name (% vault-*)' --keep 1 --dry-run
Run in dry mode, without actually deleting the packages.
Remove "Name (% vault-*)" from buster-dev and keep the last 1 packages.

This package(s) would be kept:
vault-common_1:0.11.4~20190425~buster.build0_amd64
vault-server_1:0.11.4~20190425~buster.build0_amd64

This package(s) would be deleted:
vault-common_1:0.11.4~20190424~buster.build0_amd64
vault-server_1:0.11.4~20190424~buster.build0_amd64

Full script

#!/usr/bin/env python
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# <http://www.gnu.org/licenses/>.
#
# F-Gaudet 2019
#
# Inspired from this post : https://github.com/aptly-dev/aptly/issues/291
from __future__ import print_function
import argparse
import re
import sys
from apt_pkg import version_compare, init_system
from subprocess import check_output, CalledProcessError
'''
Examples :
Single package :
clean-repo --repo buster-dev --package-query cozy-stack --dry-run
All packages :
clean-repo --repo buster-dev --package-query 'Name' --dry-run
All vault* packages :
clean-repo --repo buster-dev --package-query 'Name (% vault-*)' --dry-run
'''
class PurgeOldVersions:
def __init__(self):
self.args = self.parse_arguments()
if self.args.dry_run:
print("Run in dry mode, without actually deleting the packages.")
if not self.args.repo:
sys.exit("Repo name missing.")
if not self.args.package_query:
sys.exit("Package name missing.")
if self.args.keep <= 0:
sys.exit("Please keep at least one version.")
print("Remove \"" + self.args.package_query + "\" from " + self.args.repo +
" and keep the last " + str(self.args.keep) +
" packages.")
@staticmethod
def parse_arguments():
parser = argparse.ArgumentParser(
formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument("--dry-run", dest="dry_run",
help="List packages to remove without removing "
"them.", action="store_true")
parser.add_argument("--repo", dest="repo",
help="Which repository should be searched ?",
type=str)
parser.add_argument("--package-query", dest="package_query",
help="Which package should be removed ?\n"
"e.g.\n"
" - Single package: cozy-stack.\n"
" - Query: 'Name (%% vault-*)' "
"to match all vault packages.\n"
" - Query: 'Name' "
"to match all packages. See \n"
"https://www.aptly.info/doc/feature/query/",
type=str)
parser.add_argument("-k", "--keep", dest="keep",
help="How many package versions should be kept?",
type=int, default=20)
return parser.parse_args()
def get_packages(self):
'''Get the list of packages for a given repository. Use the package
query and the repository name given on the command line.
Returns:
Set of unique package name
'''
packages=set([])
try:
output = check_output(["aptly", "repo", "search",
self.args.repo, self.args.package_query])
except CalledProcessError as e:
print(e)
sys.exit()
output = [line for line in output.split("\n") if line]
packages.update([line.split('_')[0] for line in output])
return packages
def get_package(self,package_name):
'''Get the list of package occurance. Use the repository name given
on the command line.
Args:
package_name (str) : The package name
Returns:
List of package name with version
'''
try:
output = check_output(["aptly", "repo", "search",
self.args.repo, package_name])
except CalledProcessError as e:
print(e)
sys.exit()
output = [line for line in output.split("\n") if
line.startswith(package_name)]
return output
def purge_packages(self):
'''Purge the packages depending on the repository and the
package query given on the command line.
'''
init_system()
packages = self.get_packages()
should_keep = []
should_delete = []
for package_name in packages:
package_with_all_version=self.get_package(package_name)
def sort_cmp(name1, name2):
version_and_build_1 = name1.split("_")[1]
version_and_build_2 = name2.split("_")[1]
return version_compare(version_and_build_1,
version_and_build_2)
package_with_all_version.sort(cmp=sort_cmp)
should_delete += package_with_all_version[:-self.args.keep]
should_keep += package_with_all_version[-self.args.keep:]
if self.args.dry_run:
if should_keep:
print("\nThis package(s) would be kept:")
for p in should_keep:
print(p)
if should_delete:
print("\nThis package(s) would be deleted:")
for p in should_delete:
print(p)
else:
if should_delete:
try:
print(check_output(["aptly", "repo", "remove",
self.args.repo,
" ".join(should_delete)]))
print(check_output(["aptly", "publish", "update",
self.args.repo.split('-')[0] ] ))
except CalledProcessError as e:
print(e)
sys.exit()
else:
print("Nothing to remove")
if __name__ == '__main__':
purge_old_versions = PurgeOldVersions()
purge_old_versions.purge_packages()
view raw clean-repo.py hosted with ❤ by GitHub

Use your favorite scheduler (rundeck, cron...) to run this script.