In my quest to automate the workflow of managing Macs at work, I wanted a way to download disk image files from apple ahead of time – once downloaded the next will be to integrate with my watched install project from the previous post, and then auto lcreate the loadsets. I can then do all my management on the radmind server (picking and choosing loadsets ready to go).
The script monitors http://images.apple.com/downloads/macosx/apple/recent.rss and if it has been updated since last check (stores the last check in a plist file) it will check the feed links for any dmg it can find and download it to /downloaded_dmgs/ (which is easy enough to change in the script source)
The script source is below the fold – or download here
#!/usr/bin/env python
# encoding: utf-8
"""
This script attempts to download any dmg files it can find in Apple's RSS feed of updates
requires BeautifulSoup and feedparser modules
Created by Preston Holmes on 2009-07-15.
preston@ptone.com
Copyright (c) 2009
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
import sys
import os
import feedparser
import urllib, urllib2
from BeautifulSoup import BeautifulSoup
import plistlib
import re
import pdb
import time
feed_url = 'http://images.apple.com/downloads/macosx/apple/recent.rss'
dmg_location = '/downloaded_dmgs/'
prefs_file = '/Library/Preferences/com.ptone.updatefetcher.plist'
last_done = 0
ticker = 0
current_download = {}
last_msg = ''
debug = False
def download_status(block_ct,block_sz,total):
"""The hook will be passed three arguments; a count of blocks transferred so far, a block size in bytes, and the total size of the file."""
global last_done, current_download,ticker,last_msg
now = time.time()
done = ((block_ct * block_sz)/float(total)) * 100
if done > last_done + 5 and now-ticker > 1:
msg = "downloading %s, %smb %s %% done" % (current_download['entry'],total/(1024*1024),int(done))
sys.stderr.write(''.ljust(len(last_msg),'\b'))
sys.stderr.write(msg)
last_msg = msg
sys.stderr.flush()
last_done = done
ticker = now
def get_dmg_from_iframepage(url):
"""Sleuths out the dmg url from pages that have an iFrame download form like iTunes and Safari"""
# pdb.set_trace()
if not url.endswith('/'):
return False
soup = BeautifulSoup(urllib2.urlopen(url).read())
iframe_url = soup.find('iframe',src=re.compile('.*swdlp.apple.com.*'))
if not iframe_url:
return False
else:
iframe_url = iframe_url['src']
# user agent is needed to get the mac version of safari
user_agent = 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_7; en-us) AppleWebKit/531.2+ (KHTML, like Gecko) Version/4.0.1 Safari/530.18'
headers = { 'User-Agent' : user_agent }
req = urllib2.Request(iframe_url, headers=headers)
page_data = urllib2.urlopen(req).read()
soup = BeautifulSoup(page_data)
dmgs = soup.findAll('input',attrs = {'type':'hidden','name':'downloadURL','value':re.compile('.*dmg$')})
if len(dmgs) > 0:
return [i['value'] for i in dmgs]
else:
return False
def clear_status():
global current_download,last_done,last_msg
if last_done > 0:
sys.stderr.write(''.ljust(len(last_msg),'\b'))
sys.stderr.write(''.ljust(len(last_msg),' '))
sys.stderr.write('\r')
sys.stdout.flush()
last_done = 0
last_msg = ''
def get_dmg(dmg_url):
global current_download,last_done,last_msg
# do fancy clearing of % done
clear_status()
fname = dmg_url.split('/')[-1]
fpath = os.path.join(dmg_location,fname)
if os.path.exists(fpath):
print "%s already downloaded" % fname
else:
# save the dmg
current_download['fname'] = fname
urllib.urlretrieve (dmg_url,fpath,download_status)
clear_status()
print 'downloaded: %s' % current_download['entry']
def main():
print "============ Script Run %s ============" % time.asctime()
prefs = {}
global current_download, last_done, last_msg
print "Parsing Feed"
feed = feedparser.parse(feed_url)
feed_time = time.mktime (feed.feed.updated_parsed)
if os.path.exists(prefs_file):
prefs = plistlib.readPlist(prefs_file)
else:
prefs = {'last_check':feed_time - 1000}
if debug: prefs = {'last_check':feed_time - 1000}
now = time.mktime(time.gmtime())
# print feed_time
# print now
if feed_time <= prefs['last_check']:
print "Feed not updated since last check"
prefs['last_check'] = now
plistlib.writePlist(prefs, prefs_file)
sys.exit()
else:
print "feed updated %s" % feed.feed.updated
if not os.path.exists(dmg_location): os.makedirs(dmg_location)
# loop over the entries
for entry in feed.entries:
current_download = {'entry':entry.title}
dmg_found = False
url = entry.link
# load link with urllib2 into beautiful soup
soup = BeautifulSoup(urllib2.urlopen(url).read())
# find sidecar link
downloads = soup.findAll('a',href=re.compile('http://wsidecar.*'))
if downloads:
download_url = downloads[0]['href']
# determine if link is dmg
if download_url.endswith('.dmg'):
dmg_found = True
# make it a list for compatibility with pages that return multiple dmgs
dmg_urls = [download_url]
else:
dmg_found = dmg_urls = get_dmg_from_iframepage(download_url)
if dmg_found:
for a_dmg in dmg_urls:
get_dmg(a_dmg)
else:
print "no dmg found in %s" % entry.title
prefs['last_check'] = now
plistlib.writePlist(prefs, prefs_file)
if __name__ == '__main__':
main()
2 comments ↓
Hey there, I was wondering if this process could be used to pull more elusive packages from apples site, including the Bonjour 2010 that just came out and Digital Raw 2.7. I know the zzz061 numbers, could one conceivably plug a url in front or at the end of that code to pull the package in the same way the Software Update service on OS X Server does? Thanks,
Allister
Never mind me, I found both in /Library/Updates on a fresh system that hand’t had those applied yet. I hacked around to find a DigitalRaw.tar on a system with the update already applied, but would still love to figure out how they’re keying the updates to 061 numbers and how to curl them down if anybody had any input on the matter. Thanks, Allister