X-Git-Url: https://git.mdrn.pl/redakcja.git/blobdiff_plain/197fc570be54152c90c424379a00b36e28e0cf79..61c3753c6e0c567ccbb11115c787da22218ab5f6:/apps/compress/filters/csstidy_python/csstidy.py diff --git a/apps/compress/filters/csstidy_python/csstidy.py b/apps/compress/filters/csstidy_python/csstidy.py new file mode 100644 index 00000000..6ae8dc73 --- /dev/null +++ b/apps/compress/filters/csstidy_python/csstidy.py @@ -0,0 +1,636 @@ +# CSSTidy - CSS Parse +# +# CSS Parser class +# +# This file is part of CSSTidy. +# +# CSSTidy is free software you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation either version 2 of the License, or +# (at your option) any later version. +# +# CSSTidy is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CSSTidy if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +# +# @license http://opensource.org/licenses/gpl-license.php GNU Public License +# @package csstidy +# @author Dj Gilcrease (digitalxero at gmail dot com) 2005-2006 + +import re + +from optimizer import CSSOptimizer +from output import CSSPrinter +import data +from tools import SortedDict + +class CSSTidy(object): + #Saves the parsed CSS + _css = "" + _raw_css = SortedDict() + _optimized_css = SortedDict() + + #List of Tokens + _tokens = [] + + #Printer class + _output = None + + #Optimiser class + _optimizer = None + + #Saves the CSS charset (@charset) + _charset = '' + + #Saves all @import URLs + _import = [] + + #Saves the namespace + _namespace = '' + + #Contains the version of csstidy + _version = '1.3' + + #Stores the settings + _settings = {} + + # Saves the parser-status. + # + # Possible values: + # - is = in selector + # - ip = in property + # - iv = in value + # - instr = in string (started at " or ' or ( ) + # - ic = in comment (ignore everything) + # - at = in @-block + _status = 'is' + + #Saves the current at rule (@media) + _at = '' + + #Saves the current selector + _selector = '' + + #Saves the current property + _property = '' + + #Saves the position of , in selectors + _sel_separate = [] + + #Saves the current value + _value = '' + + #Saves the current sub-value + _sub_value = '' + + #Saves all subvalues for a property. + _sub_value_arr = [] + + #Saves the char which opened the last string + _str_char = '' + _cur_string = '' + + #Status from which the parser switched to ic or instr + _from = '' + + #Variable needed to manage string-in-strings, for example url("foo.png") + _str_in_str = False + + #=True if in invalid at-rule + _invalid_at = False + + #=True if something has been added to the current selector + _added = False + + #Saves the message log + _log = SortedDict() + + #Saves the line number + _line = 1 + + def __init__(self): + self._settings['remove_bslash'] = True + self._settings['compress_colors'] = True + self._settings['compress_font-weight'] = True + self._settings['lowercase_s'] = False + self._settings['optimise_shorthands'] = 2 + self._settings['remove_last_'] = False + self._settings['case_properties'] = 1 + self._settings['sort_properties'] = False + self._settings['sort_selectors'] = False + self._settings['merge_selectors'] = 2 + self._settings['discard_invalid_properties'] = False + self._settings['css_level'] = 'CSS2.1' + self._settings['preserve_css'] = False + self._settings['timestamp'] = False + self._settings['template'] = 'highest_compression' + + #Maps self._status to methods + self.__statusMethod = {'is':self.__parseStatus_is, 'ip': self.__parseStatus_ip, 'iv':self.__parseStatus_iv, 'instr':self.__parseStatus_instr, 'ic':self.__parseStatus_ic, 'at':self.__parseStatus_at} + + self._output = CSSPrinter(self) + self._optimizer = CSSOptimizer(self) + + #Public Methods + def getSetting(self, setting): + return self._settings.get(setting, False) + + #Set the value of a setting. + def setSetting(self, setting, value): + self._settings[setting] = value + return True + + def log(self, message, ttype, line = -1): + if line == -1: + line = self._line + + line = int(line) + + add = {'m': message, 't': ttype} + + if not self._log.has_key(line): + self._log[line] = [] + self._log[line].append(add) + elif add not in self._log[line]: + self._log[line].append(add) + + + #Checks if a character is escaped (and returns True if it is) + def escaped(self, string, pos): + return not (string[pos-1] != '\\' or self.escaped(string, pos-1)) + + #Adds CSS to an existing media/selector + def merge_css_blocks(self, media, selector, css_add): + for prop, value in css_add.iteritems(): + self.__css_add_property(media, selector, prop, value, False) + + #Checks if $value is !important. + def is_important(self, value): + return '!important' in value.lower() + + #Returns a value without !important + def gvw_important(self, value): + if self.is_important(value): + ret = value.strip() + ret = ret[0:-9] + ret = ret.strip() + ret = ret[0:-1] + ret = ret.strip() + return ret + + return value + + def parse(self, cssString): + #Switch from \r\n to \n + self._css = cssString.replace("\r\n", "\n") + ' ' + self._raw_css = {} + self._optimized_css = {} + self._curComment = '' + + #Start Parsing + i = 0 + while i < len(cssString): + if self._css[i] == "\n" or self._css[i] == "\r": + self._line += 1 + + i += self.__statusMethod[self._status](i) + + i += 1; + + self._optimized_css = self._optimizer.optimize(self._raw_css) + + def parseFile(self, filename): + try: + f = open(filename, "r") + self.parse(f.read()) + finally: + f.close() + + #Private Methods + def __parseStatus_is(self, idx): + """ + Parse in Selector + """ + ret = 0 + + if self.__is_token(self._css, idx): + if self._css[idx] == '/' and self._css[idx+1] == '*' and self._selector.strip() == '': + self._status = 'ic' + self._from = 'is' + return 1 + + elif self._css[idx] == '@' and self._selector.strip() == '': + #Check for at-rule + self._invalid_at = True + + for name, ttype in data.at_rules.iteritems(): + if self._css[idx+1:len(name)].lower() == name.lower(): + if ttype == 'at': + self._at = '@' + name + else: + self._selector = '@' + name + + self._status = ttype + self._invalid_at = False + ret += len(name) + + if self._invalid_at: + self._selector = '@' + invalid_at_name = '' + for j in xrange(idx+1, len(self._css)): + if not self._css[j].isalpha(): + break; + + invalid_at_name += self._css[j] + + self.log('Invalid @-rule: ' + invalid_at_name + ' (removed)', 'Warning') + + elif self._css[idx] == '"' or self._css[idx] == "'": + self._cur_string = self._css[idx] + self._status = 'instr' + self._str_char = self._css[idx] + self._from = 'is' + + elif self._invalid_at and self._css[idx] == ';': + self._invalid_at = False + self._status = 'is' + + elif self._css[idx] == '{': + self._status = 'ip' + self.__add_token(data.SEL_START, self._selector) + self._added = False; + + elif self._css[idx] == '}': + self.__add_token(data.AT_END, self._at) + self._at = '' + self._selector = '' + self._sel_separate = [] + + elif self._css[idx] == ',': + self._selector = self._selector.strip() + ',' + self._sel_separate.append(len(self._selector)) + + elif self._css[idx] == '\\': + self._selector += self.__unicode(idx) + + #remove unnecessary universal selector, FS#147 + elif not (self._css[idx] == '*' and self._css[idx+1] in ('.', '#', '[', ':')): + self._selector += self._css[idx] + + else: + lastpos = len(self._selector)-1 + + if lastpos == -1 or not ((self._selector[lastpos].isspace() or self.__is_token(self._selector, lastpos) and self._selector[lastpos] == ',') and self._css[idx].isspace()): + self._selector += self._css[idx] + + return ret + + def __parseStatus_ip(self, idx): + """ + Parse in property + """ + if self.__is_token(self._css, idx): + if (self._css[idx] == ':' or self._css[idx] == '=') and self._property != '': + self._status = 'iv' + + if not self.getSetting('discard_invalid_properties') or self.__property_is_valid(self._property): + self.__add_token(data.PROPERTY, self._property) + + elif self._css[idx] == '/' and self._css[idx+1] == '*' and self._property == '': + self._status = 'ic' + self._from = 'ip' + return 1 + + elif self._css[idx] == '}': + self.__explode_selectors() + self._status = 'is' + self._invalid_at = False + self.__add_token(data.SEL_END, self._selector) + self._selector = '' + self._property = '' + + elif self._css[idx] == ';': + self._property = '' + + elif self._css[idx] == '\\': + self._property += self.__unicode(idx) + + elif not self._css[idx].isspace(): + self._property += self._css[idx] + + return 0 + + def __parseStatus_iv(self, idx): + """ + Parse in value + """ + pn = (( self._css[idx] == "\n" or self._css[idx] == "\r") and self.__property_is_next(idx+1) or idx == len(self._css)) #CHECK# + if self.__is_token(self._css, idx) or pn: + if self._css[idx] == '/' and self._css[idx+1] == '*': + self._status = 'ic' + self._from = 'iv' + return 1 + + elif self._css[idx] == '"' or self._css[idx] == "'" or self._css[idx] == '(': + self._cur_string = self._css[idx] + self._str_char = ')' if self._css[idx] == '(' else self._css[idx] + self._status = 'instr' + self._from = 'iv' + + elif self._css[idx] == ',': + self._sub_value = self._sub_value.strip() + ',' + + elif self._css[idx] == '\\': + self._sub_value += self.__unicode(idx) + + elif self._css[idx] == ';' or pn: + if len(self._selector) > 0 and self._selector[0] == '@' and data.at_rules.has_key(self._selector[1:]) and data.at_rules[self._selector[1:]] == 'iv': + self._sub_value_arr.append(self._sub_value.strip()) + + self._status = 'is' + + if '@charset' in self._selector: + self._charset = self._sub_value_arr[0] + + elif '@namespace' in self._selector: + self._namespace = ' '.join(self._sub_value_arr) + + elif '@import' in self._selector: + self._import.append(' '.join(self._sub_value_arr)) + + + self._sub_value_arr = [] + self._sub_value = '' + self._selector = '' + self._sel_separate = [] + + else: + self._status = 'ip' + + elif self._css[idx] != '}': + self._sub_value += self._css[idx] + + if (self._css[idx] == '}' or self._css[idx] == ';' or pn) and self._selector != '': + if self._at == '': + self._at = data.DEFAULT_AT + + #case settings + if self.getSetting('lowercase_s'): + self._selector = self._selector.lower() + + self._property = self._property.lower() + + if self._sub_value != '': + self._sub_value_arr.append(self._sub_value) + self._sub_value = '' + + self._value = ' '.join(self._sub_value_arr) + + + self._selector = self._selector.strip() + + valid = self.__property_is_valid(self._property) + + if (not self._invalid_at or self.getSetting('preserve_css')) and (not self.getSetting('discard_invalid_properties') or valid): + self.__css_add_property(self._at, self._selector, self._property, self._value) + self.__add_token(data.VALUE, self._value) + + if not valid: + if self.getSetting('discard_invalid_properties'): + self.log('Removed invalid property: ' + self._property, 'Warning') + + else: + self.log('Invalid property in ' + self.getSetting('css_level').upper() + ': ' + self._property, 'Warning') + + self._property = ''; + self._sub_value_arr = [] + self._value = '' + + if self._css[idx] == '}': + self.__explode_selectors() + self.__add_token(data.SEL_END, self._selector) + self._status = 'is' + self._invalid_at = False + self._selector = '' + + elif not pn: + self._sub_value += self._css[idx] + + if self._css[idx].isspace(): + if self._sub_value != '': + self._sub_value_arr.append(self._sub_value) + self._sub_value = '' + + return 0 + + def __parseStatus_instr(self, idx): + """ + Parse in String + """ + if self._str_char == ')' and (self._css[idx] == '"' or self._css[idx] == "'") and not self.escaped(self._css, idx): + self._str_in_str = not self._str_in_str + + temp_add = self._css[idx] # ...and no not-escaped backslash at the previous position + if (self._css[idx] == "\n" or self._css[idx] == "\r") and not (self._css[idx-1] == '\\' and not self.escaped(self._css, idx-1)): + temp_add = "\\A " + self.log('Fixed incorrect newline in string', 'Warning') + + if not (self._str_char == ')' and self._css[idx].isspace() and not self._str_in_str): + self._cur_string += temp_add + + if self._css[idx] == self._str_char and not self.escaped(self._css, idx) and not self._str_in_str: + self._status = self._from + regex = re.compile(r'([\s]+)', re.I | re.U | re.S) + if regex.match(self._cur_string) is None and self._property != 'content': + if self._str_char == '"' or self._str_char == "'": + self._cur_string = self._cur_string[1:-1] + + elif len(self._cur_string) > 3 and (self._cur_string[1] == '"' or self._cur_string[1] == "'"): + self._cur_string = self._cur_string[0] + self._cur_string[2:-2] + self._cur_string[-1] + + if self._from == 'iv': + self._sub_value += self._cur_string + + elif self._from == 'is': + self._selector += self._cur_string + + return 0 + + def __parseStatus_ic(self, idx): + """ + Parse css In Comment + """ + if self._css[idx] == '*' and self._css[idx+1] == '/': + self._status = self._from + self.__add_token(data.COMMENT, self._curComment) + self._curComment = '' + return 1 + + else: + self._curComment += self._css[idx] + + return 0 + + def __parseStatus_at(self, idx): + """ + Parse in at-block + """ + if self.__is_token(string, idx): + if self._css[idx] == '/' and self._css[idx+1] == '*': + self._status = 'ic' + self._from = 'at' + return 1 + + elif self._css[i] == '{': + self._status = 'is' + self.__add_token(data.AT_START, self._at) + + elif self._css[i] == ',': + self._at = self._at.strip() + ',' + + elif self._css[i] == '\\': + self._at += self.__unicode(i) + else: + lastpos = len(self._at)-1 + if not (self._at[lastpos].isspace() or self.__is_token(self._at, lastpos) and self._at[lastpos] == ',') and self._css[i].isspace(): + self._at += self._css[i] + + return 0 + + def __explode_selectors(self): + #Explode multiple selectors + if self.getSetting('merge_selectors') == 1: + new_sels = [] + lastpos = 0; + self._sel_separate.append(len(self._selector)) + + for num in xrange(len(self._sel_separate)): + pos = self._sel_separate[num] + if num == (len(self._sel_separate)): #CHECK# + pos += 1 + + new_sels.append(self._selector[lastpos:(pos-lastpos-1)]) + lastpos = pos + + if len(new_sels) > 1: + for selector in new_sels: + self.merge_css_blocks(self._at, selector, self._raw_css[self._at][self._selector]) + + del self._raw_css[self._at][self._selector] + + self._sel_separate = [] + + #Adds a property with value to the existing CSS code + def __css_add_property(self, media, selector, prop, new_val): + if self.getSetting('preserve_css') or new_val.strip() == '': + return + + if not self._raw_css.has_key(media): + self._raw_css[media] = SortedDict() + + if not self._raw_css[media].has_key(selector): + self._raw_css[media][selector] = SortedDict() + + self._added = True + if self._raw_css[media][selector].has_key(prop): + if (self.is_important(self._raw_css[media][selector][prop]) and self.is_important(new_val)) or not self.is_important(self._raw_css[media][selector][prop]): + del self._raw_css[media][selector][prop] + self._raw_css[media][selector][prop] = new_val.strip() + + else: + self._raw_css[media][selector][prop] = new_val.strip() + + #Checks if the next word in a string from pos is a CSS property + def __property_is_next(self, pos): + istring = self._css[pos: len(self._css)] + pos = istring.find(':') + if pos == -1: + return False; + + istring = istring[:pos].strip().lower() + if data.all_properties.has_key(istring): + self.log('Added semicolon to the end of declaration', 'Warning') + return True + + return False; + + #Checks if a property is valid + def __property_is_valid(self, prop): + return (data.all_properties.has_key(prop) and data.all_properties[prop].find(self.getSetting('css_level').upper()) != -1) + + #Adds a token to self._tokens + def __add_token(self, ttype, cssdata, do=False): + if self.getSetting('preserve_css') or do: + if ttype == data.COMMENT: + token = [ttype, cssdata] + else: + token = [ttype, cssdata.strip()] + + self._tokens.append(token) + + #Parse unicode notations and find a replacement character + def __unicode(self, idx): + ##FIX## + return '' + + #Starts parsing from URL + ##USED? + def __parse_from_url(self, url): + try: + if "http" in url.lower() or "https" in url.lower(): + f = urllib.urlopen(url) + else: + f = open(url) + + data = f.read() + return self.parse(data) + finally: + f.close() + + #Checks if there is a token at the current position + def __is_token(self, string, idx): + return (string[idx] in data.tokens and not self.escaped(string, idx)) + + + #Property Methods + def _getOutput(self): + self._output.prepare(self._optimized_css) + return self._output.render + + def _getLog(self): + ret = "" + ks = self._log.keys() + ks.sort() + for line in ks: + for msg in self._log[line]: + ret += "Type: " + msg['t'] + "\n" + ret += "Message: " + msg['m'] + "\n" + ret += "\n" + + return ret + + def _getCSS(self): + return self._css + + + #Properties + Output = property(_getOutput, None) + Log = property(_getLog, None) + CSS = property(_getCSS, None) + + +if __name__ == '__main__': + import sys + tidy = CSSTidy() + f = open(sys.argv[1], "r") + css = f.read() + f.close() + tidy.parse(css) + tidy.Output('file', filename="Stylesheet.min.css") + print tidy.Output() + #print tidy._import \ No newline at end of file