1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 import gzip
18 try:
19 from xml.etree import cElementTree
20 except ImportError:
21 import cElementTree
22 iterparse = cElementTree.iterparse
23
24 from cStringIO import StringIO
25 import warnings
26
27 import Errors
28
29
30
32
34
35
36 handlers = {
37 '{http://linux.duke.edu/metadata/common}metadata': PrimaryEntry,
38 '{http://linux.duke.edu/metadata/filelists}filelists': FilelistsEntry,
39 '{http://linux.duke.edu/metadata/other}otherdata': OtherEntry,
40 }
41
42 self.total = None
43 self.count = 0
44 self._handlercls = None
45
46
47
48 if filename[-3:] == '.gz': fh = gzip.open(filename, 'r')
49 else: fh = open(filename, 'r')
50 parser = iterparse(fh, events=('start', 'end'))
51 self.reader = parser.__iter__()
52 event, elem = self.reader.next()
53 self._handlercls = handlers.get(elem.tag, None)
54 if not self._handlercls:
55 raise ValueError('Unknown repodata type "%s" in %s' % (
56 elem.tag, filename))
57
58 self.total = int(elem.get('packages', 0))
59
62
64 for event, elem in self.reader:
65 if event == 'end' and elem.tag[-7:] == 'package':
66 self.count += 1
67 return self._handlercls(elem)
68 raise StopIteration
69
70
72 - def __init__(self, elem):
74
75 - def __getitem__(self, k):
77
80
82 return self._p.values()
83
84 - def has_key(self, k):
85 warnings.warn('has_key() will go away in a future version of Yum.\n',
86 Errors.YumFutureDeprecationWarning, stacklevel=2)
87 return k in self._p
88
91
93 out = StringIO()
94 keys = self.keys()
95 keys.sort()
96 for k in keys:
97 line = u'%s=%s\n' % (k, self[k])
98 out.write(line.encode('utf8'))
99 return out.getvalue()
100
102 if qn.find('}') == -1: return qn
103 return qn.split('}')[1]
104
105 - def _prefixprops(self, elem, prefix):
106 ret = {}
107 for key in elem.attrib:
108 ret[prefix + '_' + self._bn(key)] = elem.attrib[key]
109 return ret
110
111 -class PrimaryEntry(BaseEntry):
112 - def __init__(self, elem):
113 BaseEntry.__init__(self, elem)
114
115 p = self._p
116
117 self.prco = {}
118 self.files = {}
119
120 for child in elem:
121 name = self._bn(child.tag)
122 if name in ('name', 'arch', 'summary', 'description', 'url',
123 'packager'):
124 p[name] = child.text
125
126 elif name == 'version':
127 p.update(child.attrib)
128
129 elif name in ('time', 'size'):
130 p.update(self._prefixprops(child, name))
131
132 elif name in ('checksum', 'location'):
133 p.update(self._prefixprops(child, name))
134 p[name + '_value'] = child.text
135 if name == 'location' and not p.has_key("location_base"):
136 p["location_base"] = None
137
138 elif name == 'format':
139 self.setFormat(child)
140
141 p['pkgId'] = p['checksum_value']
142 elem.clear()
143
145
146
147 p = self._p
148
149 for child in elem:
150 name = self._bn(child.tag)
151
152 if name in ('license', 'vendor', 'group', 'buildhost',
153 'sourcerpm'):
154 p[name] = child.text
155
156 elif name in ('provides', 'requires', 'conflicts',
157 'obsoletes'):
158 self.prco[name] = self.getPrco(child)
159
160 elif name == 'header-range':
161 p.update(self._prefixprops(child, 'rpm_header'))
162
163 elif name == 'file':
164 file_type = child.get('type', 'file')
165 path = child.text
166 self.files[path] = file_type
167
168 - def getPrco(self, elem):
169 members = []
170 for child in elem:
171 members.append(child.attrib)
172 return members
173
174
175 -class FilelistsEntry(BaseEntry):
176 - def __init__(self, elem):
177 BaseEntry.__init__(self, elem)
178 self._p['pkgId'] = elem.attrib['pkgid']
179 self.files = {}
180 for child in elem:
181 name = self._bn(child.tag)
182 if name == 'file':
183 file_type = child.get('type', 'file')
184 path = child.text
185 self.files[path] = file_type
186 elem.clear()
187
188 -class OtherEntry(BaseEntry):
189 - def __init__(self, elem):
190 BaseEntry.__init__(self, elem)
191 self._p['pkgId'] = elem.attrib['pkgid']
192 self._p['changelog'] = []
193 for child in elem:
194 name = self._bn(child.tag)
195 if name == 'changelog':
196 entry = child.attrib
197 entry['value'] = child.text
198 self._p['changelog'].append(entry)
199 elem.clear()
200
201
202
204 import sys
205
206 parser = MDParser(sys.argv[1])
207
208 for pkg in parser:
209 print '-' * 40
210 print pkg
211
212 print 'read: %s packages (%s suggested)' % (parser.count, parser.total)
213
214 if __name__ == '__main__':
215 test()
216