django.util.encoding.py
import types import urllib import locale import datetime import codecs from decimal import Decimal from django.utils.functional import Promise class DjangoUnicodeDecodeError(UnicodeDecodeError): def __init__(self, obj, *args): self.obj = obj UnicodeDecodeError.__init__(self, *args) def __str__(self): original = UnicodeDecodeError.__str__(self) return '%s. You passed in %r (%s)' % (original, self.obj, type(self.obj)) class StrAndUnicode(object): """ A class whose __str__ returns its __unicode__ as a UTF-8 bytestring. Useful as a mix-in. """ def __str__(self): return self.__unicode__().encode('utf-8') def smart_unicode(s, encoding='utf-8', strings_only=False, errors='strict'): """ Returns a unicode object representing 's'. Treats bytestrings using the 'encoding' codec. If strings_only is True, don't convert (some) non-string-like objects. """ if isinstance(s, Promise): # The input is the result of a gettext_lazy() call. return s return force_unicode(s, encoding, strings_only, errors) def is_protected_type(obj): """Determine if the object instance is of a protected type. Objects of protected types are preserved as-is when passed to force_unicode(strings_only=True). """ return isinstance(obj, ( types.NoneType, int, long, datetime.datetime, datetime.date, datetime.time, float, Decimal) ) def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'): """ Similar to smart_unicode, except that lazy instances are resolved to strings, rather than kept as lazy objects. If strings_only is True, don't convert (some) non-string-like objects. """ # Handle the common case first, saves 30-40% in performance when s # is an instance of unicode. This function gets called often in that # setting. if isinstance(s, unicode): return s if strings_only and is_protected_type(s): return s try: if not isinstance(s, basestring,): if hasattr(s, '__unicode__'): s = unicode(s) else: try: s = unicode(str(s), encoding, errors) except UnicodeEncodeError: if not isinstance(s, Exception): raise # If we get to here, the caller has passed in an Exception # subclass populated with non-ASCII data without special # handling to display as a string. We need to handle this # without raising a further exception. We do an # approximation to what the Exception's standard str() # output should be. s = ' '.join([force_unicode(arg, encoding, strings_only, errors) for arg in s]) elif not isinstance(s, unicode): # Note: We use .decode() here, instead of unicode(s, encoding, # errors), so that if s is a SafeString, it ends up being a # SafeUnicode at the end. s = s.decode(encoding, errors) except UnicodeDecodeError, e: if not isinstance(s, Exception): raise DjangoUnicodeDecodeError(s, *e.args) else: # If we get to here, the caller has passed in an Exception # subclass populated with non-ASCII bytestring data without a # working unicode method. Try to handle this without raising a # further exception by individually forcing the exception args # to unicode. s = ' '.join([force_unicode(arg, encoding, strings_only, errors) for arg in s]) return s def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'): """ Returns a bytestring version of 's', encoded as specified in 'encoding'. If strings_only is True, don't convert (some) non-string-like objects. """ if strings_only and isinstance(s, (types.NoneType, int)): return s if isinstance(s, Promise): return unicode(s).encode(encoding, errors) elif not isinstance(s, basestring): try: return str(s) except UnicodeEncodeError: if isinstance(s, Exception): # An Exception subclass containing non-ASCII data that doesn't # know how to print itself properly. We shouldn't raise a # further exception. return ' '.join([smart_str(arg, encoding, strings_only, errors) for arg in s]) return unicode(s).encode(encoding, errors) elif isinstance(s, unicode): return s.encode(encoding, errors) elif s and encoding != 'utf-8': return s.decode('utf-8', errors).encode(encoding, errors) else: return s def iri_to_uri(iri): """ Convert an Internationalized Resource Identifier (IRI) portion to a URI portion that is suitable for inclusion in a URL. This is the algorithm from section 3.1 of RFC 3987. However, since we are assuming input is either UTF-8 or unicode already, we can simplify things a little from the full method. Returns an ASCII string containing the encoded result. """ # The list of safe characters here is constructed from the "reserved" and # "unreserved" characters specified in sections 2.2 and 2.3 of RFC 3986: # reserved = gen-delims / sub-delims # gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" # sub-delims = "!" / "$" / "&" / "'" / "(" / ")" # / "*" / "+" / "," / ";" / "=" # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" # Of the unreserved characters, urllib.quote already considers all but # the ~ safe. # The % character is also added to the list of safe characters here, as the # end of section 3.1 of RFC 3987 specifically mentions that % must not be # converted. if iri is None: return iri return urllib.quote(smart_str(iri), safe="/#%[]=:;$&()+,!?*@'~") def filepath_to_uri(path): """Convert an file system path to a URI portion that is suitable for inclusion in a URL. We are assuming input is either UTF-8 or unicode already. This method will encode certain chars that would normally be recognized as special chars for URIs. Note that this method does not encode the ' character, as it is a valid character within URIs. See encodeURIComponent() JavaScript function for more details. Returns an ASCII string containing the encoded result. """ if path is None: return path # I know about `os.sep` and `os.altsep` but I want to leave # some flexibility for hardcoding separators. return urllib.quote(smart_str(path).replace("\\", "/"), safe="/~!*()'") # The encoding of the default system locale but falls back to the # given fallback encoding if the encoding is unsupported by python or could # not be determined. See tickets #10335 and #5846 try: DEFAULT_LOCALE_ENCODING = locale.getdefaultlocale()[1] or 'ascii' codecs.lookup(DEFAULT_LOCALE_ENCODING) except: DEFAULT_LOCALE_ENCODING = 'ascii'
相关推荐
Django 3.0版本引入了多项改进和新特性,包括对Python 3.8的支持、更好的性能优化以及对Unicode路径的支持。在本项目中,Django将负责处理数据库交互、路由、身份验证和授权等核心功能。 2. Django Rest Framework ...
这是因为默认情况下,`json.dumps()` 方法会将所有非 ASCII 字符转义成 Unicode 编码,而 `HttpResponse` 默认使用的编码可能不支持这些转义后的字符。为了解决这个问题,我们需要做两件事: 1. **设置 `ensure_...
- 在使用Django ORM时,如果使用FileInfo类的__unicode__方法,应该返回具体的属性名,如self.file_name,而不是self.name。这涉及到对象属性访问错误,通常因为类定义不正确或方法引用了不存在的属性。 - 当使用...
django-queryset-csv, 用于 Django querysets的CSV导出程序 用于 Django querysets的CSV导出程序。这里工具是在 Django 中重复执行以下操作而创建的:编写基于简单...自动将unicode字符编码为 UTF-8创建一个将qu
Django 中的 Unicode Django开源项目 了解 Django 项目本身的开发进程以及您如何为 Django 做贡献: 社区: 如何参与其中 | 发布进程 | 团队组织 | Django 源代码仓库 | 安全政策 | 邮件列表 设计哲学: 概览 文档...
在处理Unicode文本时,BOM是一个特殊的字符序列,用于标识文本编码的字节顺序。在某些情况下,BOM可能会引起解析问题,特别是当它不被预期或处理不当的时候。Django-BOM库提供了工具和方法来自动检测并处理这些情况...
在Python的Web开发框架Django中,为了提高应用性能和用户体验,我们经常需要处理异步任务。Celery是一个强大的分布式任务队列,它允许我们在Django应用中轻松地实现异步任务,包括信号处理。本篇文章将深入探讨如何...
MySQL通常使用`utf8mb4`编码来支持Unicode,因此需要确保在settings.py中设置正确的字符集: ```python DATABASES['default']['OPTIONS'] = {'charset': 'utf8mb4'} ``` 此外,如果你的Django模型中包含了日期或...
Django 2.2 默认支持Unicode编码,因此建议所有文件,特别是处理文本内容的文件,如HTML模板,都采用UTF-8编码。在读取文件时,应明确指定`encoding='utf-8'`,例如: ```python with Path(CURRENT_DIR, '...
Django强调DRY(Don't Repeat Yourself)原则,避免重复编码,提高开发效率。 **二、Emoticons和django_emoticons库** 表情符号在现代社交媒体和在线通信中扮演着重要角色,它们可以增强文本的情感表达。`django_...
这可能包括使用图标名称、Unicode码点或者直接链接来获取图标,使得UI设计更加灵活和多样化。 **后端开发** 后端开发是指构建服务器端应用程序的过程,包括数据库设计、服务器端逻辑和API接口等。`django_xicon` ...
需要注意的是,在Python 2.7环境下,获取到的数据是Unicode编码的,有时需要转换为str类型才能进一步处理。 对于数组型数据,如果尝试用获取字符串的方式去获取,可能会得到None。正确的做法是使用`request.GET....
mysql> CREATE DATABASE demo_django_supermarket DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; ``` - 检查数据库默认编码 ```mysql mysql> USE demo_django_supermarket; mysql> SELECT @@...
此外,`urllib`中的`urlencode`函数用于编码URL参数,但不支持直接处理Unicode对象,需要先进行编码转换。 随着Python的发展,`requests`库已经成为处理HTTP请求的首选库,因为它具有更简洁的API和更好的错误处理。...
在某些情况下,str类型并不直接拥有decode方法,因为它已经是一种Unicode编码的字符串。 - 如果代码中尝试对一个str类型使用decode方法,会导致错误。正确的做法是将str编码成bytes类型后再进行decode操作。 4. ...
总之,处理Django项目中的中文乱码问题通常涉及到文件编码、数据库配置、HTML模板、Django设置以及系统和邮件编码等方面。确保整个应用链路中的编码一致且支持UTF-8是解决这类问题的关键。在实际开发中,遇到此类...
需要注意的是,在Django 1.7及更高版本中,更推荐使用`JsonResponse`类来创建JSON响应,因为这个类提供了更多的便利性和安全性,例如自动设置正确的Content-Type头和处理Unicode编码: ```python from django....
3. **Python 3**:Python 3是Python语言的最新版本,具有更现代的语法和功能,相比Python 2有诸多改进,包括更好的字符串处理、新的内存管理以及对Unicode的全面支持。 4. **Python库的发布与安装**:在Python社区...
总的来说,Django提供了多种方式来处理URL重定向,可以根据项目需求选择最适合的方法。在URL配置中设置重定向简洁明了,视图中的`HttpResponseRedirect`适合视图逻辑后的跳转,而`redirects`应用则适用于管理大量和...
大部分情况下,Django框架会默认使用unicode编码,这样可以支持多语言内容的存储。如果遇到了乱码问题,要检查是否是在数据上传过程中某些环节未能正确处理编码转换。 5. Python代码编码规范:有些开发者在Python...