+++ /dev/null
-# CSSTidy - CSS Parse
-#
-# CSS Parser class
-#
-# This file is part of CSSTidy.
-#
-# CSSTidy is free software you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation either version 2 of the License, or
-# (at your option) any later version.
-#
-# CSSTidy is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with CSSTidy if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-#
-# @license http://opensource.org/licenses/gpl-license.php GNU Public License
-# @package csstidy
-# @author Dj Gilcrease (digitalxero at gmail dot com) 2005-2006
-
-import re
-
-from optimizer import CSSOptimizer
-from output import CSSPrinter
-import data
-from tools import SortedDict
-
-class CSSTidy(object):
- #Saves the parsed CSS
- _css = ""
- _raw_css = SortedDict()
- _optimized_css = SortedDict()
-
- #List of Tokens
- _tokens = []
-
- #Printer class
- _output = None
-
- #Optimiser class
- _optimizer = None
-
- #Saves the CSS charset (@charset)
- _charset = ''
-
- #Saves all @import URLs
- _import = []
-
- #Saves the namespace
- _namespace = ''
-
- #Contains the version of csstidy
- _version = '1.3'
-
- #Stores the settings
- _settings = {}
-
- # Saves the parser-status.
- #
- # Possible values:
- # - is = in selector
- # - ip = in property
- # - iv = in value
- # - instr = in string (started at " or ' or ( )
- # - ic = in comment (ignore everything)
- # - at = in @-block
- _status = 'is'
-
- #Saves the current at rule (@media)
- _at = ''
-
- #Saves the current selector
- _selector = ''
-
- #Saves the current property
- _property = ''
-
- #Saves the position of , in selectors
- _sel_separate = []
-
- #Saves the current value
- _value = ''
-
- #Saves the current sub-value
- _sub_value = ''
-
- #Saves all subvalues for a property.
- _sub_value_arr = []
-
- #Saves the char which opened the last string
- _str_char = ''
- _cur_string = ''
-
- #Status from which the parser switched to ic or instr
- _from = ''
-
- #Variable needed to manage string-in-strings, for example url("foo.png")
- _str_in_str = False
-
- #=True if in invalid at-rule
- _invalid_at = False
-
- #=True if something has been added to the current selector
- _added = False
-
- #Saves the message log
- _log = SortedDict()
-
- #Saves the line number
- _line = 1
-
- def __init__(self):
- self._settings['remove_bslash'] = True
- self._settings['compress_colors'] = True
- self._settings['compress_font-weight'] = True
- self._settings['lowercase_s'] = False
- self._settings['optimise_shorthands'] = 2
- self._settings['remove_last_'] = False
- self._settings['case_properties'] = 1
- self._settings['sort_properties'] = False
- self._settings['sort_selectors'] = False
- self._settings['merge_selectors'] = 2
- self._settings['discard_invalid_properties'] = False
- self._settings['css_level'] = 'CSS2.1'
- self._settings['preserve_css'] = False
- self._settings['timestamp'] = False
- self._settings['template'] = 'highest_compression'
-
- #Maps self._status to methods
- self.__statusMethod = {'is':self.__parseStatus_is, 'ip': self.__parseStatus_ip, 'iv':self.__parseStatus_iv, 'instr':self.__parseStatus_instr, 'ic':self.__parseStatus_ic, 'at':self.__parseStatus_at}
-
- self._output = CSSPrinter(self)
- self._optimizer = CSSOptimizer(self)
-
- #Public Methods
- def getSetting(self, setting):
- return self._settings.get(setting, False)
-
- #Set the value of a setting.
- def setSetting(self, setting, value):
- self._settings[setting] = value
- return True
-
- def log(self, message, ttype, line = -1):
- if line == -1:
- line = self._line
-
- line = int(line)
-
- add = {'m': message, 't': ttype}
-
- if not self._log.has_key(line):
- self._log[line] = []
- self._log[line].append(add)
- elif add not in self._log[line]:
- self._log[line].append(add)
-
-
- #Checks if a character is escaped (and returns True if it is)
- def escaped(self, string, pos):
- return not (string[pos-1] != '\\' or self.escaped(string, pos-1))
-
- #Adds CSS to an existing media/selector
- def merge_css_blocks(self, media, selector, css_add):
- for prop, value in css_add.iteritems():
- self.__css_add_property(media, selector, prop, value, False)
-
- #Checks if $value is !important.
- def is_important(self, value):
- return '!important' in value.lower()
-
- #Returns a value without !important
- def gvw_important(self, value):
- if self.is_important(value):
- ret = value.strip()
- ret = ret[0:-9]
- ret = ret.strip()
- ret = ret[0:-1]
- ret = ret.strip()
- return ret
-
- return value
-
- def parse(self, cssString):
- #Switch from \r\n to \n
- self._css = cssString.replace("\r\n", "\n") + ' '
- self._raw_css = {}
- self._optimized_css = {}
- self._curComment = ''
-
- #Start Parsing
- i = 0
- while i < len(cssString):
- if self._css[i] == "\n" or self._css[i] == "\r":
- self._line += 1
-
- i += self.__statusMethod[self._status](i)
-
- i += 1;
-
- self._optimized_css = self._optimizer.optimize(self._raw_css)
-
- def parseFile(self, filename):
- try:
- f = open(filename, "r")
- self.parse(f.read())
- finally:
- f.close()
-
- #Private Methods
- def __parseStatus_is(self, idx):
- """
- Parse in Selector
- """
- ret = 0
-
- if self.__is_token(self._css, idx):
- if self._css[idx] == '/' and self._css[idx+1] == '*' and self._selector.strip() == '':
- self._status = 'ic'
- self._from = 'is'
- return 1
-
- elif self._css[idx] == '@' and self._selector.strip() == '':
- #Check for at-rule
- self._invalid_at = True
-
- for name, ttype in data.at_rules.iteritems():
- if self._css[idx+1:len(name)].lower() == name.lower():
- if ttype == 'at':
- self._at = '@' + name
- else:
- self._selector = '@' + name
-
- self._status = ttype
- self._invalid_at = False
- ret += len(name)
-
- if self._invalid_at:
- self._selector = '@'
- invalid_at_name = ''
- for j in xrange(idx+1, len(self._css)):
- if not self._css[j].isalpha():
- break;
-
- invalid_at_name += self._css[j]
-
- self.log('Invalid @-rule: ' + invalid_at_name + ' (removed)', 'Warning')
-
- elif self._css[idx] == '"' or self._css[idx] == "'":
- self._cur_string = self._css[idx]
- self._status = 'instr'
- self._str_char = self._css[idx]
- self._from = 'is'
-
- elif self._invalid_at and self._css[idx] == ';':
- self._invalid_at = False
- self._status = 'is'
-
- elif self._css[idx] == '{':
- self._status = 'ip'
- self.__add_token(data.SEL_START, self._selector)
- self._added = False;
-
- elif self._css[idx] == '}':
- self.__add_token(data.AT_END, self._at)
- self._at = ''
- self._selector = ''
- self._sel_separate = []
-
- elif self._css[idx] == ',':
- self._selector = self._selector.strip() + ','
- self._sel_separate.append(len(self._selector))
-
- elif self._css[idx] == '\\':
- self._selector += self.__unicode(idx)
-
- #remove unnecessary universal selector, FS#147
- elif not (self._css[idx] == '*' and self._css[idx+1] in ('.', '#', '[', ':')):
- self._selector += self._css[idx]
-
- else:
- lastpos = len(self._selector)-1
-
- if lastpos == -1 or not ((self._selector[lastpos].isspace() or self.__is_token(self._selector, lastpos) and self._selector[lastpos] == ',') and self._css[idx].isspace()):
- self._selector += self._css[idx]
-
- return ret
-
- def __parseStatus_ip(self, idx):
- """
- Parse in property
- """
- if self.__is_token(self._css, idx):
- if (self._css[idx] == ':' or self._css[idx] == '=') and self._property != '':
- self._status = 'iv'
-
- if not self.getSetting('discard_invalid_properties') or self.__property_is_valid(self._property):
- self.__add_token(data.PROPERTY, self._property)
-
- elif self._css[idx] == '/' and self._css[idx+1] == '*' and self._property == '':
- self._status = 'ic'
- self._from = 'ip'
- return 1
-
- elif self._css[idx] == '}':
- self.__explode_selectors()
- self._status = 'is'
- self._invalid_at = False
- self.__add_token(data.SEL_END, self._selector)
- self._selector = ''
- self._property = ''
-
- elif self._css[idx] == ';':
- self._property = ''
-
- elif self._css[idx] == '\\':
- self._property += self.__unicode(idx)
-
- elif not self._css[idx].isspace():
- self._property += self._css[idx]
-
- return 0
-
- def __parseStatus_iv(self, idx):
- """
- Parse in value
- """
- pn = (( self._css[idx] == "\n" or self._css[idx] == "\r") and self.__property_is_next(idx+1) or idx == len(self._css)) #CHECK#
- if self.__is_token(self._css, idx) or pn:
- if self._css[idx] == '/' and self._css[idx+1] == '*':
- self._status = 'ic'
- self._from = 'iv'
- return 1
-
- elif self._css[idx] == '"' or self._css[idx] == "'" or self._css[idx] == '(':
- self._cur_string = self._css[idx]
- self._str_char = ')' if self._css[idx] == '(' else self._css[idx]
- self._status = 'instr'
- self._from = 'iv'
-
- elif self._css[idx] == ',':
- self._sub_value = self._sub_value.strip() + ','
-
- elif self._css[idx] == '\\':
- self._sub_value += self.__unicode(idx)
-
- elif self._css[idx] == ';' or pn:
- if len(self._selector) > 0 and self._selector[0] == '@' and data.at_rules.has_key(self._selector[1:]) and data.at_rules[self._selector[1:]] == 'iv':
- self._sub_value_arr.append(self._sub_value.strip())
-
- self._status = 'is'
-
- if '@charset' in self._selector:
- self._charset = self._sub_value_arr[0]
-
- elif '@namespace' in self._selector:
- self._namespace = ' '.join(self._sub_value_arr)
-
- elif '@import' in self._selector:
- self._import.append(' '.join(self._sub_value_arr))
-
-
- self._sub_value_arr = []
- self._sub_value = ''
- self._selector = ''
- self._sel_separate = []
-
- else:
- self._status = 'ip'
-
- elif self._css[idx] != '}':
- self._sub_value += self._css[idx]
-
- if (self._css[idx] == '}' or self._css[idx] == ';' or pn) and self._selector != '':
- if self._at == '':
- self._at = data.DEFAULT_AT
-
- #case settings
- if self.getSetting('lowercase_s'):
- self._selector = self._selector.lower()
-
- self._property = self._property.lower()
-
- if self._sub_value != '':
- self._sub_value_arr.append(self._sub_value)
- self._sub_value = ''
-
- self._value = ' '.join(self._sub_value_arr)
-
-
- self._selector = self._selector.strip()
-
- valid = self.__property_is_valid(self._property)
-
- if (not self._invalid_at or self.getSetting('preserve_css')) and (not self.getSetting('discard_invalid_properties') or valid):
- self.__css_add_property(self._at, self._selector, self._property, self._value)
- self.__add_token(data.VALUE, self._value)
-
- if not valid:
- if self.getSetting('discard_invalid_properties'):
- self.log('Removed invalid property: ' + self._property, 'Warning')
-
- else:
- self.log('Invalid property in ' + self.getSetting('css_level').upper() + ': ' + self._property, 'Warning')
-
- self._property = '';
- self._sub_value_arr = []
- self._value = ''
-
- if self._css[idx] == '}':
- self.__explode_selectors()
- self.__add_token(data.SEL_END, self._selector)
- self._status = 'is'
- self._invalid_at = False
- self._selector = ''
-
- elif not pn:
- self._sub_value += self._css[idx]
-
- if self._css[idx].isspace():
- if self._sub_value != '':
- self._sub_value_arr.append(self._sub_value)
- self._sub_value = ''
-
- return 0
-
- def __parseStatus_instr(self, idx):
- """
- Parse in String
- """
- if self._str_char == ')' and (self._css[idx] == '"' or self._css[idx] == "'") and not self.escaped(self._css, idx):
- self._str_in_str = not self._str_in_str
-
- temp_add = self._css[idx] # ...and no not-escaped backslash at the previous position
- if (self._css[idx] == "\n" or self._css[idx] == "\r") and not (self._css[idx-1] == '\\' and not self.escaped(self._css, idx-1)):
- temp_add = "\\A "
- self.log('Fixed incorrect newline in string', 'Warning')
-
- if not (self._str_char == ')' and self._css[idx].isspace() and not self._str_in_str):
- self._cur_string += temp_add
-
- if self._css[idx] == self._str_char and not self.escaped(self._css, idx) and not self._str_in_str:
- self._status = self._from
- regex = re.compile(r'([\s]+)', re.I | re.U | re.S)
- if regex.match(self._cur_string) is None and self._property != 'content':
- if self._str_char == '"' or self._str_char == "'":
- self._cur_string = self._cur_string[1:-1]
-
- elif len(self._cur_string) > 3 and (self._cur_string[1] == '"' or self._cur_string[1] == "'"):
- self._cur_string = self._cur_string[0] + self._cur_string[2:-2] + self._cur_string[-1]
-
- if self._from == 'iv':
- self._sub_value += self._cur_string
-
- elif self._from == 'is':
- self._selector += self._cur_string
-
- return 0
-
- def __parseStatus_ic(self, idx):
- """
- Parse css In Comment
- """
- if self._css[idx] == '*' and self._css[idx+1] == '/':
- self._status = self._from
- self.__add_token(data.COMMENT, self._curComment)
- self._curComment = ''
- return 1
-
- else:
- self._curComment += self._css[idx]
-
- return 0
-
- def __parseStatus_at(self, idx):
- """
- Parse in at-block
- """
- if self.__is_token(string, idx):
- if self._css[idx] == '/' and self._css[idx+1] == '*':
- self._status = 'ic'
- self._from = 'at'
- return 1
-
- elif self._css[i] == '{':
- self._status = 'is'
- self.__add_token(data.AT_START, self._at)
-
- elif self._css[i] == ',':
- self._at = self._at.strip() + ','
-
- elif self._css[i] == '\\':
- self._at += self.__unicode(i)
- else:
- lastpos = len(self._at)-1
- if not (self._at[lastpos].isspace() or self.__is_token(self._at, lastpos) and self._at[lastpos] == ',') and self._css[i].isspace():
- self._at += self._css[i]
-
- return 0
-
- def __explode_selectors(self):
- #Explode multiple selectors
- if self.getSetting('merge_selectors') == 1:
- new_sels = []
- lastpos = 0;
- self._sel_separate.append(len(self._selector))
-
- for num in xrange(len(self._sel_separate)):
- pos = self._sel_separate[num]
- if num == (len(self._sel_separate)): #CHECK#
- pos += 1
-
- new_sels.append(self._selector[lastpos:(pos-lastpos-1)])
- lastpos = pos
-
- if len(new_sels) > 1:
- for selector in new_sels:
- self.merge_css_blocks(self._at, selector, self._raw_css[self._at][self._selector])
-
- del self._raw_css[self._at][self._selector]
-
- self._sel_separate = []
-
- #Adds a property with value to the existing CSS code
- def __css_add_property(self, media, selector, prop, new_val):
- if self.getSetting('preserve_css') or new_val.strip() == '':
- return
-
- if not self._raw_css.has_key(media):
- self._raw_css[media] = SortedDict()
-
- if not self._raw_css[media].has_key(selector):
- self._raw_css[media][selector] = SortedDict()
-
- self._added = True
- if self._raw_css[media][selector].has_key(prop):
- if (self.is_important(self._raw_css[media][selector][prop]) and self.is_important(new_val)) or not self.is_important(self._raw_css[media][selector][prop]):
- del self._raw_css[media][selector][prop]
- self._raw_css[media][selector][prop] = new_val.strip()
-
- else:
- self._raw_css[media][selector][prop] = new_val.strip()
-
- #Checks if the next word in a string from pos is a CSS property
- def __property_is_next(self, pos):
- istring = self._css[pos: len(self._css)]
- pos = istring.find(':')
- if pos == -1:
- return False;
-
- istring = istring[:pos].strip().lower()
- if data.all_properties.has_key(istring):
- self.log('Added semicolon to the end of declaration', 'Warning')
- return True
-
- return False;
-
- #Checks if a property is valid
- def __property_is_valid(self, prop):
- return (data.all_properties.has_key(prop) and data.all_properties[prop].find(self.getSetting('css_level').upper()) != -1)
-
- #Adds a token to self._tokens
- def __add_token(self, ttype, cssdata, do=False):
- if self.getSetting('preserve_css') or do:
- if ttype == data.COMMENT:
- token = [ttype, cssdata]
- else:
- token = [ttype, cssdata.strip()]
-
- self._tokens.append(token)
-
- #Parse unicode notations and find a replacement character
- def __unicode(self, idx):
- ##FIX##
- return ''
-
- #Starts parsing from URL
- ##USED?
- def __parse_from_url(self, url):
- try:
- if "http" in url.lower() or "https" in url.lower():
- f = urllib.urlopen(url)
- else:
- f = open(url)
-
- data = f.read()
- return self.parse(data)
- finally:
- f.close()
-
- #Checks if there is a token at the current position
- def __is_token(self, string, idx):
- return (string[idx] in data.tokens and not self.escaped(string, idx))
-
-
- #Property Methods
- def _getOutput(self):
- self._output.prepare(self._optimized_css)
- return self._output.render
-
- def _getLog(self):
- ret = ""
- ks = self._log.keys()
- ks.sort()
- for line in ks:
- for msg in self._log[line]:
- ret += "Type: " + msg['t'] + "\n"
- ret += "Message: " + msg['m'] + "\n"
- ret += "\n"
-
- return ret
-
- def _getCSS(self):
- return self._css
-
-
- #Properties
- Output = property(_getOutput, None)
- Log = property(_getLog, None)
- CSS = property(_getCSS, None)
-
-
-if __name__ == '__main__':
- import sys
- tidy = CSSTidy()
- f = open(sys.argv[1], "r")
- css = f.read()
- f.close()
- tidy.parse(css)
- tidy.Output('file', filename="Stylesheet.min.css")
- print tidy.Output()
- #print tidy._import
\ No newline at end of file