Package yum :: Module mdparser
[hide private]
[frames] | no frames]

Source Code for Module yum.mdparser

  1   
  2  # This program is free software; you can redistribute it and/or modify 
  3  # it under the terms of the GNU General Public License as published by 
  4  # the Free Software Foundation; either version 2 of the License, or 
  5  # (at your option) any later version. 
  6  # 
  7  # This program is distributed in the hope that it will be useful, 
  8  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  9  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 10  # GNU Library General Public License for more details. 
 11  # 
 12  # You should have received a copy of the GNU General Public License 
 13  # along with this program; if not, write to the Free Software 
 14  # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 
 15  # Copyright 2005 Duke University  
 16   
 17  import gzip 
 18  try: 
 19      from xml.etree import cElementTree 
 20  except ImportError: 
 21      import cElementTree 
 22  iterparse = cElementTree.iterparse 
 23   
 24  from cStringIO import StringIO 
 25  import warnings 
 26   
 27  import Errors 
 28   
 29  #TODO: document everything here 
 30   
31 -class MDParser:
32
33 - def __init__(self, filename):
34 35 # Set up mapping of meta types to handler classes 36 handlers = { 37 '{http://linux.duke.edu/metadata/common}metadata': PrimaryEntry, 38 '{http://linux.duke.edu/metadata/filelists}filelists': FilelistsEntry, 39 '{http://linux.duke.edu/metadata/other}otherdata': OtherEntry, 40 } 41 42 self.total = None 43 self.count = 0 44 self._handlercls = None 45 46 # Read in type, set package node handler and get total number of 47 # packages 48 if filename[-3:] == '.gz': fh = gzip.open(filename, 'r') 49 else: fh = open(filename, 'r') 50 parser = iterparse(fh, events=('start', 'end')) 51 self.reader = parser.__iter__() 52 event, elem = self.reader.next() 53 self._handlercls = handlers.get(elem.tag, None) 54 if not self._handlercls: 55 raise ValueError('Unknown repodata type "%s" in %s' % ( 56 elem.tag, filename)) 57 # Get the total number of packages 58 self.total = int(elem.get('packages', 0))
59
60 - def __iter__(self):
61 return self
62
63 - def next(self):
64 for event, elem in self.reader: 65 if event == 'end' and elem.tag[-7:] == 'package': 66 self.count += 1 67 return self._handlercls(elem) 68 raise StopIteration
69 70
71 -class BaseEntry:
72 - def __init__(self, elem):
73 self._p = {}
74
75 - def __getitem__(self, k):
76 return self._p[k]
77
78 - def keys(self):
79 return self._p.keys()
80
81 - def values(self):
82 return self._p.values()
83
84 - def has_key(self, k):
85 warnings.warn('has_key() will go away in a future version of Yum.\n', 86 Errors.YumFutureDeprecationWarning, stacklevel=2) 87 return k in self._p
88
89 - def __iter__(self):
90 return iter(self._p)
91
92 - def __str__(self):
93 out = StringIO() 94 keys = self.keys() 95 keys.sort() 96 for k in keys: 97 line = u'%s=%s\n' % (k, self[k]) 98 out.write(line.encode('utf8')) 99 return out.getvalue()
100
101 - def _bn(self, qn):
102 if qn.find('}') == -1: return qn 103 return qn.split('}')[1]
104
105 - def _prefixprops(self, elem, prefix):
106 ret = {} 107 for key in elem.attrib: 108 ret[prefix + '_' + self._bn(key)] = elem.attrib[key] 109 return ret
110
111 -class PrimaryEntry(BaseEntry):
112 - def __init__(self, elem):
113 BaseEntry.__init__(self, elem) 114 # Avoid excess typing :) 115 p = self._p 116 117 self.prco = {} 118 self.files = {} 119 120 for child in elem: 121 name = self._bn(child.tag) 122 if name in ('name', 'arch', 'summary', 'description', 'url', 123 'packager'): 124 p[name] = child.text 125 126 elif name == 'version': 127 p.update(child.attrib) 128 129 elif name in ('time', 'size'): 130 p.update(self._prefixprops(child, name)) 131 132 elif name in ('checksum', 'location'): 133 p.update(self._prefixprops(child, name)) 134 p[name + '_value'] = child.text 135 if name == 'location' and not p.has_key("location_base"): 136 p["location_base"] = None 137 138 elif name == 'format': 139 self.setFormat(child) 140 141 p['pkgId'] = p['checksum_value'] 142 elem.clear()
143
144 - def setFormat(self, elem):
145 146 # Avoid excessive typing :) 147 p = self._p 148 149 for child in elem: 150 name = self._bn(child.tag) 151 152 if name in ('license', 'vendor', 'group', 'buildhost', 153 'sourcerpm'): 154 p[name] = child.text 155 156 elif name in ('provides', 'requires', 'conflicts', 157 'obsoletes'): 158 self.prco[name] = self.getPrco(child) 159 160 elif name == 'header-range': 161 p.update(self._prefixprops(child, 'rpm_header')) 162 163 elif name == 'file': 164 file_type = child.get('type', 'file') 165 path = child.text 166 self.files[path] = file_type
167
168 - def getPrco(self, elem):
169 members = [] 170 for child in elem: 171 members.append(child.attrib) 172 return members
173 174
175 -class FilelistsEntry(BaseEntry):
176 - def __init__(self, elem):
177 BaseEntry.__init__(self, elem) 178 self._p['pkgId'] = elem.attrib['pkgid'] 179 self.files = {} 180 for child in elem: 181 name = self._bn(child.tag) 182 if name == 'file': 183 file_type = child.get('type', 'file') 184 path = child.text 185 self.files[path] = file_type 186 elem.clear()
187
188 -class OtherEntry(BaseEntry):
189 - def __init__(self, elem):
190 BaseEntry.__init__(self, elem) 191 self._p['pkgId'] = elem.attrib['pkgid'] 192 self._p['changelog'] = [] 193 for child in elem: 194 name = self._bn(child.tag) 195 if name == 'changelog': 196 entry = child.attrib 197 entry['value'] = child.text 198 self._p['changelog'].append(entry) 199 elem.clear()
200 201 202
203 -def test():
204 import sys 205 206 parser = MDParser(sys.argv[1]) 207 208 for pkg in parser: 209 print '-' * 40 210 print pkg 211 212 print 'read: %s packages (%s suggested)' % (parser.count, parser.total)
213 214 if __name__ == '__main__': 215 test() 216