You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					37 lines
				
				1.1 KiB
			
		
		
			
		
	
	
					37 lines
				
				1.1 KiB
			| 
								 
											3 years ago
										 
									 | 
							
								import codecs
							 | 
						||
| 
								 | 
							
								import locale
							 | 
						||
| 
								 | 
							
								import re
							 | 
						||
| 
								 | 
							
								import sys
							 | 
						||
| 
								 | 
							
								from typing import List, Tuple
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								BOMS: List[Tuple[bytes, str]] = [
							 | 
						||
| 
								 | 
							
								    (codecs.BOM_UTF8, "utf-8"),
							 | 
						||
| 
								 | 
							
								    (codecs.BOM_UTF16, "utf-16"),
							 | 
						||
| 
								 | 
							
								    (codecs.BOM_UTF16_BE, "utf-16-be"),
							 | 
						||
| 
								 | 
							
								    (codecs.BOM_UTF16_LE, "utf-16-le"),
							 | 
						||
| 
								 | 
							
								    (codecs.BOM_UTF32, "utf-32"),
							 | 
						||
| 
								 | 
							
								    (codecs.BOM_UTF32_BE, "utf-32-be"),
							 | 
						||
| 
								 | 
							
								    (codecs.BOM_UTF32_LE, "utf-32-le"),
							 | 
						||
| 
								 | 
							
								]
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								ENCODING_RE = re.compile(br"coding[:=]\s*([-\w.]+)")
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								def auto_decode(data: bytes) -> str:
							 | 
						||
| 
								 | 
							
								    """Check a bytes string for a BOM to correctly detect the encoding
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    Fallback to locale.getpreferredencoding(False) like open() on Python3"""
							 | 
						||
| 
								 | 
							
								    for bom, encoding in BOMS:
							 | 
						||
| 
								 | 
							
								        if data.startswith(bom):
							 | 
						||
| 
								 | 
							
								            return data[len(bom) :].decode(encoding)
							 | 
						||
| 
								 | 
							
								    # Lets check the first two lines as in PEP263
							 | 
						||
| 
								 | 
							
								    for line in data.split(b"\n")[:2]:
							 | 
						||
| 
								 | 
							
								        if line[0:1] == b"#" and ENCODING_RE.search(line):
							 | 
						||
| 
								 | 
							
								            result = ENCODING_RE.search(line)
							 | 
						||
| 
								 | 
							
								            assert result is not None
							 | 
						||
| 
								 | 
							
								            encoding = result.groups()[0].decode("ascii")
							 | 
						||
| 
								 | 
							
								            return data.decode(encoding)
							 | 
						||
| 
								 | 
							
								    return data.decode(
							 | 
						||
| 
								 | 
							
								        locale.getpreferredencoding(False) or sys.getdefaultencoding(),
							 | 
						||
| 
								 | 
							
								    )
							 |