Scrawls from Preston...

Powered by Pelican.

Thu 16 July 2009

Pre-fetch Apple downloads as dmg files

In my quest to automate the workflow of managing Macs at work, I wanted a way to download disk image files from apple ahead of time - once downloaded the next will be to integrate with my watched install project from the previous post, and then auto lcreate the loadsets. I can then do all my management on the radmind server (picking and choosing loadsets ready to go).

The script monitors http://images.apple.com/downloads/macosx/apple/recent.rss and if it has been updated since last check (stores the last check in a plist file) it will check the feed links for any dmg it can find and download it to /downloaded_dmgs/ (which is easy enough to change in the script source)

The script source is below the fold - or download [here](/downloads/get_apple_updates.py.zip)

#!/usr/bin/env python# encoding: utf-8"""This script attempts to download any dmg files it can find in Apple's RSS feed of updatesrequires BeautifulSoup and feedparser modulesCreated by Preston Holmes on 2009-07-15.preston@ptone.comCopyright (c) 2009Permission is hereby granted, free of charge, to any person obtaininga copy of this software and associated documentation files (the"Software"), to deal in the Software without restriction, includingwithout limitation the rights to use, copy, modify, merge, publish,distribute, sublicense, and/or sell copies of the Software, and topermit persons to whom the Software is furnished to do so, subject tothe following conditions:The above copyright notice and this permission notice shall be includedin all copies or substantial portions of the Software.THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OFMERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANYCLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THESOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE."""import sysimport osimport feedparserimport urllib, urllib2from BeautifulSoup import BeautifulSoupimport plistlibimport reimport pdbimport timefeed_url = 'http://images.apple.com/downloads/macosx/apple/recent.rss'dmg_location = '/downloaded_dmgs/'prefs_file = '/Library/Preferences/com.ptone.updatefetcher.plist'last_done = 0ticker = 0current_download = {}last_msg = ''debug = Falsedef download_status(block_ct,block_sz,total):    """The hook will be passed three arguments; a count of blocks transferred so far, a block size in bytes, and the total size of the file."""    global last_done, current_download,ticker,last_msg    now = time.time()    done = ((block_ct * block_sz)/float(total)) * 100    if done > last_done + 5 and now-ticker > 1:        msg = "downloading %s, %smb %s %% done" % (current_download['entry'],total/(1024*1024),int(done))         sys.stderr.write(''.ljust(len(last_msg),'\b'))        sys.stderr.write(msg)        last_msg = msg        sys.stderr.flush()        last_done = done        ticker = nowdef get_dmg_from_iframepage(url):    """Sleuths out the dmg url from pages that have an iFrame download form like iTunes and Safari"""    # pdb.set_trace()    if not url.endswith('/'):        return False    soup = BeautifulSoup(urllib2.urlopen(url).read())    iframe_url = soup.find('iframe',src=re.compile('.*swdlp.apple.com.*'))    if not iframe_url:         return False    else:        iframe_url = iframe_url['src']    # user agent is needed to get the mac version of safari    user_agent = 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_7; en-us) AppleWebKit/531.2+ (KHTML, like Gecko) Version/4.0.1 Safari/530.18'    headers = { 'User-Agent' : user_agent }    req = urllib2.Request(iframe_url, headers=headers)    page_data = urllib2.urlopen(req).read()    soup = BeautifulSoup(page_data)    dmgs = soup.findAll('input',attrs = {'type':'hidden','name':'downloadURL','value':re.compile('.*dmg$')})    if len(dmgs) > 0:        return [i['value'] for i in dmgs]    else:        return Falsedef clear_status():    global current_download,last_done,last_msg    if last_done > 0:        sys.stderr.write(''.ljust(len(last_msg),'\b'))        sys.stderr.write(''.ljust(len(last_msg),' '))        sys.stderr.write('\r')        sys.stdout.flush()    last_done = 0    last_msg = ''    def get_dmg(dmg_url):    global current_download,last_done,last_msg    # do fancy clearing of % done    clear_status()    fname = dmg_url.split('/')[-1]    fpath = os.path.join(dmg_location,fname)    if os.path.exists(fpath):         print "%s already downloaded" % fname    else:        # save the dmg        current_download['fname'] = fname        urllib.urlretrieve (dmg_url,fpath,download_status)        clear_status()        print 'downloaded: %s' % current_download['entry']def main():    print "============ Script Run %s ============" % time.asctime()    prefs = {}    global current_download, last_done, last_msg    print "Parsing Feed"    feed = feedparser.parse(feed_url)    feed_time = time.mktime (feed.feed.updated_parsed)    if os.path.exists(prefs_file):        prefs = plistlib.readPlist(prefs_file)    else:        prefs = {'last_check':feed_time - 1000}        if debug: prefs = {'last_check':feed_time - 1000}        now = time.mktime(time.gmtime())    # print feed_time    # print now    if feed_time <= prefs['last_check']:        print "Feed not updated since last check"        prefs['last_check'] = now        plistlib.writePlist(prefs, prefs_file)        sys.exit()    else:        print "feed updated %s" % feed.feed.updated    if not os.path.exists(dmg_location): os.makedirs(dmg_location)    # loop over the entries    for entry in feed.entries:        current_download = {'entry':entry.title}        dmg_found = False        url = entry.link        # load link with urllib2 into beautiful soup        soup = BeautifulSoup(urllib2.urlopen(url).read())        # find sidecar link        downloads = soup.findAll('a',href=re.compile('http://wsidecar.*'))        if downloads:            download_url = downloads[0]['href']            # determine if link is dmg                            if download_url.endswith('.dmg'):                dmg_found = True                # make it a list for compatibility with pages that return multiple dmgs                dmg_urls = [download_url]            else:                dmg_found = dmg_urls = get_dmg_from_iframepage(download_url)                        if dmg_found:                for a_dmg in dmg_urls:                    get_dmg(a_dmg)            else:                print "no dmg found in %s" % entry.title                    prefs['last_check'] = now    plistlib.writePlist(prefs, prefs_file)    if __name__ == '__main__':    main()


http://ptone.com/dablog