Pre-fetch Apple downloads as dmg files

Tags:

In my quest to automate the workflow of managing Macs at work, I wanted a way to download disk image files from apple ahead of time – once downloaded the next will be to integrate with my watched install project from the previous post, and then auto lcreate the loadsets. I can then do all my management on the radmind server (picking and choosing loadsets ready to go).

The script monitors http://images.apple.com/downloads/macosx/apple/recent.rss and if it has been updated since last check (stores the last check in a plist file) it will check the feed links for any dmg it can find and download it to /downloaded_dmgs/ (which is easy enough to change in the script source)

The script source is below the fold – or download here

#!/usr/bin/env python
# encoding: utf-8
"""
This script attempts to download any dmg files it can find in Apple's RSS feed of updates

requires BeautifulSoup and feedparser modules

Created by Preston Holmes on 2009-07-15.
preston@ptone.com
Copyright (c) 2009

Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:

The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""

import sys
import os
import feedparser
import urllib, urllib2
from BeautifulSoup import BeautifulSoup
import plistlib
import re
import pdb
import time

feed_url = 'http://images.apple.com/downloads/macosx/apple/recent.rss'
dmg_location = '/downloaded_dmgs/'
prefs_file = '/Library/Preferences/com.ptone.updatefetcher.plist'

last_done = 0
ticker = 0
current_download = {}
last_msg = ''
debug = False

def download_status(block_ct,block_sz,total):
    """The hook will be passed three arguments; a count of blocks transferred so far, a block size in bytes, and the total size of the file."""
    global last_done, current_download,ticker,last_msg
    now = time.time()
    done = ((block_ct * block_sz)/float(total)) * 100
    if done > last_done + 5 and now-ticker > 1:
        msg = "downloading %s, %smb %s %% done" % (current_download['entry'],total/(1024*1024),int(done)) 
        sys.stderr.write(''.ljust(len(last_msg),'\b'))
        sys.stderr.write(msg)
        last_msg = msg
        sys.stderr.flush()
        last_done = done
        ticker = now

def get_dmg_from_iframepage(url):
    """Sleuths out the dmg url from pages that have an iFrame download form like iTunes and Safari"""
    # pdb.set_trace()
    if not url.endswith('/'):
        return False
    soup = BeautifulSoup(urllib2.urlopen(url).read())
    iframe_url = soup.find('iframe',src=re.compile('.*swdlp.apple.com.*'))
    if not iframe_url: 
        return False
    else:
        iframe_url = iframe_url['src']
    # user agent is needed to get the mac version of safari
    user_agent = 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_7; en-us) AppleWebKit/531.2+ (KHTML, like Gecko) Version/4.0.1 Safari/530.18'
    headers = { 'User-Agent' : user_agent }
    req = urllib2.Request(iframe_url, headers=headers)
    page_data = urllib2.urlopen(req).read()
    soup = BeautifulSoup(page_data)
    dmgs = soup.findAll('input',attrs = {'type':'hidden','name':'downloadURL','value':re.compile('.*dmg$')})
    if len(dmgs) > 0:
        return [i['value'] for i in dmgs]
    else:
        return False

def clear_status():
    global current_download,last_done,last_msg
    if last_done > 0:
        sys.stderr.write(''.ljust(len(last_msg),'\b'))
        sys.stderr.write(''.ljust(len(last_msg),' '))
        sys.stderr.write('\r')
        sys.stdout.flush()
    last_done = 0
    last_msg = ''
    
def get_dmg(dmg_url):
    global current_download,last_done,last_msg
    # do fancy clearing of % done
    clear_status()
    fname = dmg_url.split('/')[-1]
    fpath = os.path.join(dmg_location,fname)
    if os.path.exists(fpath): 
        print "%s already downloaded" % fname
    else:
        # save the dmg
        current_download['fname'] = fname
        urllib.urlretrieve (dmg_url,fpath,download_status)
        clear_status()
        print 'downloaded: %s' % current_download['entry']

def main():
    print "============ Script Run %s ============" % time.asctime()
    prefs = {}
    global current_download, last_done, last_msg

    print "Parsing Feed"
    feed = feedparser.parse(feed_url)
    feed_time = time.mktime (feed.feed.updated_parsed)
    if os.path.exists(prefs_file):
        prefs = plistlib.readPlist(prefs_file)
    else:
        prefs = {'last_check':feed_time - 1000}
    
    if debug: prefs = {'last_check':feed_time - 1000}
    
    now = time.mktime(time.gmtime())
    # print feed_time
    # print now
    if feed_time <= prefs['last_check']:
        print "Feed not updated since last check"
        prefs['last_check'] = now
        plistlib.writePlist(prefs, prefs_file)
        sys.exit()
    else:
        print "feed updated %s" % feed.feed.updated
    if not os.path.exists(dmg_location): os.makedirs(dmg_location)
    # loop over the entries
    for entry in feed.entries:
        current_download = {'entry':entry.title}
        dmg_found = False
        url = entry.link
        # load link with urllib2 into beautiful soup
        soup = BeautifulSoup(urllib2.urlopen(url).read())
        # find sidecar link
        downloads = soup.findAll('a',href=re.compile('http://wsidecar.*'))
        if downloads:
            download_url = downloads[0]['href']
            # determine if link is dmg                
            if download_url.endswith('.dmg'):
                dmg_found = True
                # make it a list for compatibility with pages that return multiple dmgs
                dmg_urls = [download_url]
            else:
                dmg_found = dmg_urls = get_dmg_from_iframepage(download_url)            
            if dmg_found:
                for a_dmg in dmg_urls:
                    get_dmg(a_dmg)
            else:
                print "no dmg found in %s" % entry.title
                
    prefs['last_check'] = now
    plistlib.writePlist(prefs, prefs_file)
    
if __name__ == '__main__':
    main()

2 comments ↓

#1 Allister Banks on 01.09.10 at 10:04 pm

Hey there, I was wondering if this process could be used to pull more elusive packages from apples site, including the Bonjour 2010 that just came out and Digital Raw 2.7. I know the zzz061 numbers, could one conceivably plug a url in front or at the end of that code to pull the package in the same way the Software Update service on OS X Server does? Thanks,

Allister

#2 Allister Banks on 01.10.10 at 4:18 pm

Never mind me, I found both in /Library/Updates on a fresh system that hand’t had those applied yet. I hacked around to find a DigitalRaw.tar on a system with the update already applied, but would still love to figure out how they’re keying the updates to 061 numbers and how to curl them down if anybody had any input on the matter. Thanks, Allister

Leave a Comment