5 # This file is part of CSSTidy.
7 # CSSTidy is free software you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation either version 2 of the License, or
10 # (at your option) any later version.
12 # CSSTidy is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with CSSTidy if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 # @license http://opensource.org/licenses/gpl-license.php GNU Public License
23 # @author Dj Gilcrease (digitalxero at gmail dot com) 2005-2006
27 from optimizer import CSSOptimizer
28 from output import CSSPrinter
30 from tools import SortedDict
32 class CSSTidy(object):
35 _raw_css = SortedDict()
36 _optimized_css = SortedDict()
47 #Saves the CSS charset (@charset)
50 #Saves all @import URLs
56 #Contains the version of csstidy
62 # Saves the parser-status.
68 # - instr = in string (started at " or ' or ( )
69 # - ic = in comment (ignore everything)
73 #Saves the current at rule (@media)
76 #Saves the current selector
79 #Saves the current property
82 #Saves the position of , in selectors
85 #Saves the current value
88 #Saves the current sub-value
91 #Saves all subvalues for a property.
94 #Saves the char which opened the last string
98 #Status from which the parser switched to ic or instr
101 #Variable needed to manage string-in-strings, for example url("foo.png")
104 #=True if in invalid at-rule
107 #=True if something has been added to the current selector
110 #Saves the message log
113 #Saves the line number
117 self._settings['remove_bslash'] = True
118 self._settings['compress_colors'] = True
119 self._settings['compress_font-weight'] = True
120 self._settings['lowercase_s'] = False
121 self._settings['optimise_shorthands'] = 2
122 self._settings['remove_last_'] = False
123 self._settings['case_properties'] = 1
124 self._settings['sort_properties'] = False
125 self._settings['sort_selectors'] = False
126 self._settings['merge_selectors'] = 2
127 self._settings['discard_invalid_properties'] = False
128 self._settings['css_level'] = 'CSS2.1'
129 self._settings['preserve_css'] = False
130 self._settings['timestamp'] = False
131 self._settings['template'] = 'highest_compression'
133 #Maps self._status to methods
134 self.__statusMethod = {'is':self.__parseStatus_is, 'ip': self.__parseStatus_ip, 'iv':self.__parseStatus_iv, 'instr':self.__parseStatus_instr, 'ic':self.__parseStatus_ic, 'at':self.__parseStatus_at}
136 self._output = CSSPrinter(self)
137 self._optimizer = CSSOptimizer(self)
140 def getSetting(self, setting):
141 return self._settings.get(setting, False)
143 #Set the value of a setting.
144 def setSetting(self, setting, value):
145 self._settings[setting] = value
148 def log(self, message, ttype, line = -1):
154 add = {'m': message, 't': ttype}
156 if not self._log.has_key(line):
158 self._log[line].append(add)
159 elif add not in self._log[line]:
160 self._log[line].append(add)
163 #Checks if a character is escaped (and returns True if it is)
164 def escaped(self, string, pos):
165 return not (string[pos-1] != '\\' or self.escaped(string, pos-1))
167 #Adds CSS to an existing media/selector
168 def merge_css_blocks(self, media, selector, css_add):
169 for prop, value in css_add.iteritems():
170 self.__css_add_property(media, selector, prop, value, False)
172 #Checks if $value is !important.
173 def is_important(self, value):
174 return '!important' in value.lower()
176 #Returns a value without !important
177 def gvw_important(self, value):
178 if self.is_important(value):
188 def parse(self, cssString):
189 #Switch from \r\n to \n
190 self._css = cssString.replace("\r\n", "\n") + ' '
192 self._optimized_css = {}
193 self._curComment = ''
197 while i < len(cssString):
198 if self._css[i] == "\n" or self._css[i] == "\r":
201 i += self.__statusMethod[self._status](i)
205 self._optimized_css = self._optimizer.optimize(self._raw_css)
207 def parseFile(self, filename):
209 f = open(filename, "r")
215 def __parseStatus_is(self, idx):
221 if self.__is_token(self._css, idx):
222 if self._css[idx] == '/' and self._css[idx+1] == '*' and self._selector.strip() == '':
227 elif self._css[idx] == '@' and self._selector.strip() == '':
229 self._invalid_at = True
231 for name, ttype in data.at_rules.iteritems():
232 if self._css[idx+1:len(name)].lower() == name.lower():
234 self._at = '@' + name
236 self._selector = '@' + name
239 self._invalid_at = False
245 for j in xrange(idx+1, len(self._css)):
246 if not self._css[j].isalpha():
249 invalid_at_name += self._css[j]
251 self.log('Invalid @-rule: ' + invalid_at_name + ' (removed)', 'Warning')
253 elif self._css[idx] == '"' or self._css[idx] == "'":
254 self._cur_string = self._css[idx]
255 self._status = 'instr'
256 self._str_char = self._css[idx]
259 elif self._invalid_at and self._css[idx] == ';':
260 self._invalid_at = False
263 elif self._css[idx] == '{':
265 self.__add_token(data.SEL_START, self._selector)
268 elif self._css[idx] == '}':
269 self.__add_token(data.AT_END, self._at)
272 self._sel_separate = []
274 elif self._css[idx] == ',':
275 self._selector = self._selector.strip() + ','
276 self._sel_separate.append(len(self._selector))
278 elif self._css[idx] == '\\':
279 self._selector += self.__unicode(idx)
281 #remove unnecessary universal selector, FS#147
282 elif not (self._css[idx] == '*' and self._css[idx+1] in ('.', '#', '[', ':')):
283 self._selector += self._css[idx]
286 lastpos = len(self._selector)-1
288 if lastpos == -1 or not ((self._selector[lastpos].isspace() or self.__is_token(self._selector, lastpos) and self._selector[lastpos] == ',') and self._css[idx].isspace()):
289 self._selector += self._css[idx]
293 def __parseStatus_ip(self, idx):
297 if self.__is_token(self._css, idx):
298 if (self._css[idx] == ':' or self._css[idx] == '=') and self._property != '':
301 if not self.getSetting('discard_invalid_properties') or self.__property_is_valid(self._property):
302 self.__add_token(data.PROPERTY, self._property)
304 elif self._css[idx] == '/' and self._css[idx+1] == '*' and self._property == '':
309 elif self._css[idx] == '}':
310 self.__explode_selectors()
312 self._invalid_at = False
313 self.__add_token(data.SEL_END, self._selector)
317 elif self._css[idx] == ';':
320 elif self._css[idx] == '\\':
321 self._property += self.__unicode(idx)
323 elif not self._css[idx].isspace():
324 self._property += self._css[idx]
328 def __parseStatus_iv(self, idx):
332 pn = (( self._css[idx] == "\n" or self._css[idx] == "\r") and self.__property_is_next(idx+1) or idx == len(self._css)) #CHECK#
333 if self.__is_token(self._css, idx) or pn:
334 if self._css[idx] == '/' and self._css[idx+1] == '*':
339 elif self._css[idx] == '"' or self._css[idx] == "'" or self._css[idx] == '(':
340 self._cur_string = self._css[idx]
341 self._str_char = ')' if self._css[idx] == '(' else self._css[idx]
342 self._status = 'instr'
345 elif self._css[idx] == ',':
346 self._sub_value = self._sub_value.strip() + ','
348 elif self._css[idx] == '\\':
349 self._sub_value += self.__unicode(idx)
351 elif self._css[idx] == ';' or pn:
352 if len(self._selector) > 0 and self._selector[0] == '@' and data.at_rules.has_key(self._selector[1:]) and data.at_rules[self._selector[1:]] == 'iv':
353 self._sub_value_arr.append(self._sub_value.strip())
357 if '@charset' in self._selector:
358 self._charset = self._sub_value_arr[0]
360 elif '@namespace' in self._selector:
361 self._namespace = ' '.join(self._sub_value_arr)
363 elif '@import' in self._selector:
364 self._import.append(' '.join(self._sub_value_arr))
367 self._sub_value_arr = []
370 self._sel_separate = []
375 elif self._css[idx] != '}':
376 self._sub_value += self._css[idx]
378 if (self._css[idx] == '}' or self._css[idx] == ';' or pn) and self._selector != '':
380 self._at = data.DEFAULT_AT
383 if self.getSetting('lowercase_s'):
384 self._selector = self._selector.lower()
386 self._property = self._property.lower()
388 if self._sub_value != '':
389 self._sub_value_arr.append(self._sub_value)
392 self._value = ' '.join(self._sub_value_arr)
395 self._selector = self._selector.strip()
397 valid = self.__property_is_valid(self._property)
399 if (not self._invalid_at or self.getSetting('preserve_css')) and (not self.getSetting('discard_invalid_properties') or valid):
400 self.__css_add_property(self._at, self._selector, self._property, self._value)
401 self.__add_token(data.VALUE, self._value)
404 if self.getSetting('discard_invalid_properties'):
405 self.log('Removed invalid property: ' + self._property, 'Warning')
408 self.log('Invalid property in ' + self.getSetting('css_level').upper() + ': ' + self._property, 'Warning')
411 self._sub_value_arr = []
414 if self._css[idx] == '}':
415 self.__explode_selectors()
416 self.__add_token(data.SEL_END, self._selector)
418 self._invalid_at = False
422 self._sub_value += self._css[idx]
424 if self._css[idx].isspace():
425 if self._sub_value != '':
426 self._sub_value_arr.append(self._sub_value)
431 def __parseStatus_instr(self, idx):
435 if self._str_char == ')' and (self._css[idx] == '"' or self._css[idx] == "'") and not self.escaped(self._css, idx):
436 self._str_in_str = not self._str_in_str
438 temp_add = self._css[idx] # ...and no not-escaped backslash at the previous position
439 if (self._css[idx] == "\n" or self._css[idx] == "\r") and not (self._css[idx-1] == '\\' and not self.escaped(self._css, idx-1)):
441 self.log('Fixed incorrect newline in string', 'Warning')
443 if not (self._str_char == ')' and self._css[idx].isspace() and not self._str_in_str):
444 self._cur_string += temp_add
446 if self._css[idx] == self._str_char and not self.escaped(self._css, idx) and not self._str_in_str:
447 self._status = self._from
448 regex = re.compile(r'([\s]+)', re.I | re.U | re.S)
449 if regex.match(self._cur_string) is None and self._property != 'content':
450 if self._str_char == '"' or self._str_char == "'":
451 self._cur_string = self._cur_string[1:-1]
453 elif len(self._cur_string) > 3 and (self._cur_string[1] == '"' or self._cur_string[1] == "'"):
454 self._cur_string = self._cur_string[0] + self._cur_string[2:-2] + self._cur_string[-1]
456 if self._from == 'iv':
457 self._sub_value += self._cur_string
459 elif self._from == 'is':
460 self._selector += self._cur_string
464 def __parseStatus_ic(self, idx):
468 if self._css[idx] == '*' and self._css[idx+1] == '/':
469 self._status = self._from
470 self.__add_token(data.COMMENT, self._curComment)
471 self._curComment = ''
475 self._curComment += self._css[idx]
479 def __parseStatus_at(self, idx):
483 if self.__is_token(string, idx):
484 if self._css[idx] == '/' and self._css[idx+1] == '*':
489 elif self._css[i] == '{':
491 self.__add_token(data.AT_START, self._at)
493 elif self._css[i] == ',':
494 self._at = self._at.strip() + ','
496 elif self._css[i] == '\\':
497 self._at += self.__unicode(i)
499 lastpos = len(self._at)-1
500 if not (self._at[lastpos].isspace() or self.__is_token(self._at, lastpos) and self._at[lastpos] == ',') and self._css[i].isspace():
501 self._at += self._css[i]
505 def __explode_selectors(self):
506 #Explode multiple selectors
507 if self.getSetting('merge_selectors') == 1:
510 self._sel_separate.append(len(self._selector))
512 for num in xrange(len(self._sel_separate)):
513 pos = self._sel_separate[num]
514 if num == (len(self._sel_separate)): #CHECK#
517 new_sels.append(self._selector[lastpos:(pos-lastpos-1)])
520 if len(new_sels) > 1:
521 for selector in new_sels:
522 self.merge_css_blocks(self._at, selector, self._raw_css[self._at][self._selector])
524 del self._raw_css[self._at][self._selector]
526 self._sel_separate = []
528 #Adds a property with value to the existing CSS code
529 def __css_add_property(self, media, selector, prop, new_val):
530 if self.getSetting('preserve_css') or new_val.strip() == '':
533 if not self._raw_css.has_key(media):
534 self._raw_css[media] = SortedDict()
536 if not self._raw_css[media].has_key(selector):
537 self._raw_css[media][selector] = SortedDict()
540 if self._raw_css[media][selector].has_key(prop):
541 if (self.is_important(self._raw_css[media][selector][prop]) and self.is_important(new_val)) or not self.is_important(self._raw_css[media][selector][prop]):
542 del self._raw_css[media][selector][prop]
543 self._raw_css[media][selector][prop] = new_val.strip()
546 self._raw_css[media][selector][prop] = new_val.strip()
548 #Checks if the next word in a string from pos is a CSS property
549 def __property_is_next(self, pos):
550 istring = self._css[pos: len(self._css)]
551 pos = istring.find(':')
555 istring = istring[:pos].strip().lower()
556 if data.all_properties.has_key(istring):
557 self.log('Added semicolon to the end of declaration', 'Warning')
562 #Checks if a property is valid
563 def __property_is_valid(self, prop):
564 return (data.all_properties.has_key(prop) and data.all_properties[prop].find(self.getSetting('css_level').upper()) != -1)
566 #Adds a token to self._tokens
567 def __add_token(self, ttype, cssdata, do=False):
568 if self.getSetting('preserve_css') or do:
569 if ttype == data.COMMENT:
570 token = [ttype, cssdata]
572 token = [ttype, cssdata.strip()]
574 self._tokens.append(token)
576 #Parse unicode notations and find a replacement character
577 def __unicode(self, idx):
581 #Starts parsing from URL
583 def __parse_from_url(self, url):
585 if "http" in url.lower() or "https" in url.lower():
586 f = urllib.urlopen(url)
591 return self.parse(data)
595 #Checks if there is a token at the current position
596 def __is_token(self, string, idx):
597 return (string[idx] in data.tokens and not self.escaped(string, idx))
601 def _getOutput(self):
602 self._output.prepare(self._optimized_css)
603 return self._output.render
607 ks = self._log.keys()
610 for msg in self._log[line]:
611 ret += "Type: " + msg['t'] + "\n"
612 ret += "Message: " + msg['m'] + "\n"
622 Output = property(_getOutput, None)
623 Log = property(_getLog, None)
624 CSS = property(_getCSS, None)
627 if __name__ == '__main__':
630 f = open(sys.argv[1], "r")
634 tidy.Output('file', filename="Stylesheet.min.css")