Incremental Rsync backup script to take daily, weekly and monthly backups automatically

The script makes daily backups of target folders and keeps user rights and folder structure using rsync. The script keeps three adjacent daily backups and after three days only weekly backups are held and other daily backups are removed. After a few weeks only the first weekly backup of each month is kept. This saves a lot of space as recent changes are held daily, but older changes are stored only weekly and then monthly. Backups are stored in separate folders in the backup drive. In each of the folder there is a subfolder for each of the target folders for backup with their backuped contents inside. The backup structure can be mounted as a read only drive which allows the users to retrieve their missing content without any administrative operations.

Rsync uses hard links to store similar files which saves huge amount of backup space as only modified files are written into the backup drive. All unmodified files are hardlinked between the backup instances.

The script is designed to be run daily using cron.

Download the script here


#!/usr/bin/python3
# -*- coding: utf-8 -*-

# Copyright (C) 2014-2026, Heikki Hyyti
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

# Revision 1, 2014: Initial working version for python2
# Revision 2, 2025: Update to work with python3
# Revision 3, 2026: Update to detect if previous backing up has not succeeded. Instead of moving the backup out of way, just run rsync again

#print("Backup script is doing nothing, just exiting")
#exit(1)

# --- configurations --- #

# Programs to do operations
rsync_prog = '/usr/bin/rsync'
move_prog = '/bin/mv'
remove_prog = '/bin/rm'
mkdir_prog = '/bin/mkdir'
timestamp_prog = '/bin/date \'+%Y-%m-%d %H:%M\' >'

# File to list exclude rules (configure this to your exclude file)
exclude_file = '/etc/rsync_exclude'

# Mountpoint of backup disk (configure this to your backup device mountpoint)
backup_mountpoint = '/backup'

# Name base is a path to the backup structure without mountpoint (ie. mountpoint/name_base.01/data_directories)
# (Configure this to base name on your backup device)
backup_name_base = 'snapshots/backup'

# Number of monthly, weekly and daily backups 
num_of_monthly_backups = 12
num_of_weekly_backups = 4
num_of_daily_backups = 4

# Locations to backup (configure these with full paths)
target_directories = ['/etc','/var', '/home', '/data']

# Timestamp filename to write file with current date of backup to the backup directory
timestamp_filename = 'timestamp.txt'

# Target weekday to take weekly backups (Monday is 1 and Sunday is 7)
# Monthly backups are always taken at the first week of the month
target_weekday = 6

# Debug level 0...2  (set for 101 or larger to disable execution and to just print, 100 will allow making new directories and moving them, use 100 only in a test directory)
DEBUG = 2


# --- Code starts, function declarations --- #

import subprocess
from os import path
from sys import exit
from pipes import quote
import datetime

# Function to write timestamp to the folder
def write_timestamp():
    if (DEBUG > 1): print('Write timestamp')
    
    # Set backup targets path
    backup_target = '%s/%s/' % (backup_mountpoint, backup_name_base)  

    # Timestamp path and filename
    timestamp_file = '%s%s' % (backup_target, timestamp_filename)
  
    # On success, print timestamp to folder
    timestamp_command = '%s %s' % (timestamp_prog, timestamp_file)
    if (DEBUG > 0):
        print(timestamp_command)
    
    if (DEBUG <= 100):  # allow to set DEBUG to a very large number to disable execution to test the operation
        timestamp_exec = subprocess.getstatusoutput(timestamp_command)
        if (timestamp_exec[0] != 0):
            print(timestamp_exec)

# Function to test if timestamp exists (this is a prequisite for rotating old backups out of way instead of writing on top of previous unsucccessful backup)
def is_timestamp():
    # Set backup targets path
    backup_target = '%s/%s/' % (backup_mountpoint, backup_name_base)  
    
    # Timestamp path and filename
    timestamp_file = '%s%s' % (backup_target, timestamp_filename)
    
    return_val = path.isfile(timestamp_file)
    if (DEBUG > 0):
        if (return_val):
            print('Timestamp file \'%s\' was found' % (timestamp_file))
        else:
            print('Timestamp file \'%s\' was NOT found !!!' % (timestamp_file))
        
    return return_val


# Function to backup given directory to backup structure
def backup_directory(backup_source):
    if (DEBUG > 1): print('Backup \'%s\'' % backup_source)
    
    # Remove last '/' from soure directory if given
    if (backup_source[-1] == '/'):
        backup_source = backup_source[:-1]

    # Set backup targets and link target if exists
    backup_target = '%s/%s/' % (backup_mountpoint, backup_name_base)
    backup_links = '%s/%s.01/' % (backup_mountpoint, backup_name_base)
    
    links_string = ' --link-dest=%s ' % (backup_links)
    if (not path.isdir(backup_links)):
        links_string = ' '
    
    exclude_string = ' --exclude-from=%s --delete-excluded' % quote(exclude_file)
    if (not path.exists(exclude_file)):
        exclude_string = ''
    
    
    # Create new folder for the new backup if it doesn't exist
    if (not path.isdir(backup_target)):
        mkdir_command = '%s %s' % (mkdir_prog, backup_target)
        if (DEBUG > 0):
            print(mkdir_command)
        
        if (DEBUG <= 100):  # allow to set DEBUG to a very large number to disable execution to test the operation
            mkdir_exec = subprocess.getstatusoutput(mkdir_command)
            if (mkdir_exec[0] != 0):
                print(mkdir_exec)

            
    # Build and execute Backup Command
    backup_command = '%s -a --delete%s%s%s %s' % (rsync_prog, exclude_string, links_string, backup_source, backup_target)
    if (DEBUG > 0):
        print(backup_command)
    
    if (DEBUG < 99):  # allow to set DEBUG to a very large number to disable execution to test the operation
        backup_exec = subprocess.getstatusoutput(backup_command)
        if (backup_exec[0] != 0):
            print(backup_exec)
            
        return backup_exec
    return True
    
    
# Move old backup from source to target
def move_old_backup_to(source, target):
    if (DEBUG > 1): print('Move \'%s\' to \'%s\'' % (source, target))
    
    # Allow operations only in backup directory
    sd = '%s/%s' % (backup_mountpoint, source) 
    td = '%s/%s' % (backup_mountpoint, target)
    
    if (path.isdir(sd)):
        if (path.isdir(td)):
            print('ERROR: target path \'%s\' already exists' % td)
            return -1
            
        else:
            move_exec = False
            move_command = '%s %s %s' % (move_prog, sd, td)
            if (DEBUG > 0):
                print(move_command)
            
            if (DEBUG <= 100):  # allow to set DEBUG to a very large number to disable execution to test the operation
                move_exec = subprocess.getstatusoutput(move_command)
                if (move_exec[0] != 0):
                    print(move_exec)
                
            return move_exec
            
    else:
        print('ERROR: source path \'%s\' does not exist' % sd)
        return False

        
# Remove target directory
def remove_directory(target):
    if (DEBUG > 1): print('Remove \'%s\'' % target)
    
    # Allow operations only in backup directory
    td = '%s/%s' % (backup_mountpoint, target)
    
    if (path.isdir(td)):
        remove_exec = False
        remove_command = '%s -r %s' % (remove_prog, td)
        if (DEBUG > 0):
            print(remove_command)
        
        if (DEBUG < 99):  # allow to set DEBUG to a very large number to disable execution to test the operation
            remove_exec = subprocess.getstatusoutput(remove_command)
            if (remove_exec[0] != 0):
                print(remove_exec)
            
            return remove_exec
        return True

    else:
        print('ERROR: target path \'%s\' does not exist' % td)
        return False

        
# Remove backup with index
def remove_backup(idx):
    target = '%s.%02d' % (backup_name_base,idx)
    if (path.isdir('%s/%s' % (backup_mountpoint, target))):
        return remove_directory(target)
    return False

    
# Remove all too old backups (if number of backups is decreased smaller, this removes all older backups)
def remove_all_old_backups():
    #Remove backup with id N_max and all larger
    idx = N_max
    while (remove_backup(idx)):
        idx += 1
    
        
# Algorithm to move and remove backups in daily, weekly and monthly fashion
# it takes current_date as an argument for testing purposes, usually it should be used using default argument
def store_old_backups(current_date = datetime.datetime.today()):
    if (current_date.isoweekday() == target_weekday):
        if (current_date.day <= 7):
            # Monthly backups (only at first week of a month)
            for i in range(N_max, N_w, -1):
                source = '%s.%02d' % (backup_name_base,i-1)
                target = '%s.%02d' % (backup_name_base,i)
                if (path.isdir('%s/%s' % (backup_mountpoint, source))):
                    move_old_backup_to(source, target)
        else:
            #remove last weekly backup as it was not moved to monthly backups
            remove_backup(N_w)
            
        # Weekly backups
        for i in range(N_w, N_d, -1):
            source = '%s.%02d' % (backup_name_base,i-1)
            target = '%s.%02d' % (backup_name_base,i)
            if (path.isdir('%s/%s' % (backup_mountpoint, source))):
                move_old_backup_to(source, target)
    else:
        #remove last daily backup as it was not moved to weekly backups
        remove_backup(N_d)
            
    # Daily backups
    for i in range(N_d, 1, -1):
        source = '%s.%02d' % (backup_name_base,i-1)
        target = '%s.%02d' % (backup_name_base,i)
        if (path.isdir('%s/%s' % (backup_mountpoint, source))):
            move_old_backup_to(source, target)
    
    # Move last backup to first daily archive
    source = '%s' % backup_name_base
    target = '%s.%02d' % (backup_name_base,1)
    if (path.isdir('%s/%s' % (backup_mountpoint, source))):
        move_old_backup_to(source, target)

            
# A function to check if backup disk is mounted
def is_backup_disk_mounted(mountpoint):
    df = subprocess.Popen(["df"], stdout=subprocess.PIPE)
    output = df.communicate()[0]
    lines = output.decode().split("\n")
    for i in range(1, len(lines)):
        lineparts = lines[i].split()
        if (len(lineparts) > 5):
            if (lineparts[5] == mountpoint):
                return True 

    return False
            
    
# --- Backup script starts --- #

# Compute number of backup history
if num_of_monthly_backups < 0:
    num_of_monthly_backups = 0
if num_of_weekly_backups < 0:
    num_of_weekly_backups = 0
if num_of_daily_backups < 1:
    num_of_daily_backups = 1
    
N_max = num_of_monthly_backups + num_of_weekly_backups + num_of_daily_backups
N_w = num_of_weekly_backups + num_of_daily_backups
N_d = num_of_daily_backups


# Check that backup mountpoint doesn't have '/' at the end (this restricts backing up to '/' and other mountpoints don't end to it)
if (backup_mountpoint[-1] == '/'):
    backup_mountpoint = backup_mountpoint[:-1]

# Check that base name doesn't have '/' at the end
if (backup_name_base[-1] == '/'):
    backup_name_base = backup_name_base[:-1]
    
# Check that backup directory structure and mountpoint exist
backup_path = '%s/%s' % (backup_mountpoint, backup_name_base)
backup_root = backup_path[:(backup_path.rindex('/'))]

if (not is_backup_disk_mounted(backup_mountpoint)):
    print('ERROR: backup mountpoint \'%s\' does not exist, backup not completed!' % backup_mountpoint)
    exit(-1)

if (not path.isdir(backup_root)):
    print('ERROR: backup root path \'%s\' does not exist, backup not completed!' % backup_root)
    exit(-1)


    
## The algorithm starts ##

if (is_timestamp()):
    # if timestamp is found from the most recent backup, the backups are rotated out of way
    remove_all_old_backups()
    store_old_backups()

# start backing up the listed directories
all_successful = True
for i in range(len(target_directories)):
    return_val = backup_directory(quote(target_directories[i]))
    all_successful = all_successful and return_val

# write timestamp only if all copying succeeded
if (all_successful):
    write_timestamp()