我正在尝试使用 XMLHttpRequest(使用最近的 Webkit)下载二进制文件,并使用这个简单的函数对其内容进行 base64 编码:
I'm trying to download a binary file using XMLHttpRequest (using a recent Webkit) and base64-encode its contents using this simple function:
function getBinary(file){
var xhr = new XMLHttpRequest();
xhr.open("GET", file, false);
xhr.overrideMimeType("text/plain; charset=x-user-defined");
xhr.send(null);
return xhr.responseText;
}
function base64encode(binary) {
return btoa(unescape(encodeURIComponent(binary)));
}
var binary = getBinary('http://some.tld/sample.pdf');
var base64encoded = base64encode(binary);
附带说明,以上所有内容都是标准的 Javascript 内容,包括 btoa() 和 encodeURIComponent():https://developer.mozilla.org/en/DOM/window.btoa
As a side note, everything above is standard Javascript stuff, including btoa() and encodeURIComponent(): https://developer.mozilla.org/en/DOM/window.btoa
这很顺利,我什至可以使用 Javascript 解码 base64 内容:
This works pretty smoothly, and I can even decode the base64 contents using Javascript:
function base64decode(base64) {
return decodeURIComponent(escape(atob(base64)));
}
var decodedBinary = base64decode(base64encoded);
decodedBinary === binary // true
现在,我想使用 Python 解码 base64 编码的内容,它使用一些 JSON 字符串来获取 base64encoded 字符串值.天真地这就是我所做的:
Now, I want to decode the base64-encoded contents using Python which consume some JSON string to get the base64encoded string value. Naively this is what I do:
import urllib
import base64
# ... retrieving of base64 encoded string through JSON
base64 = "77+9UE5HDQ……………oaCgA="
source_contents = urllib.unquote(base64.b64decode(base64))
destination_file = open(destination, 'wb')
destination_file.write(source_contents)
destination_file.close()
但生成的文件无效,看起来操作与 UTF-8、编码或我仍然不清楚的东西混淆了.
But the resulting file is invalid, looks like the operation's messaed up with UTF-8, encoding or something which is still unclear to me.
如果我尝试在将 UTF-8 内容放入目标文件之前对其进行解码,则会引发错误:
If I try to decode UTF-8 contents before putting them in the destination file, an error is raised:
import urllib
import base64
# ... retrieving of base64 encoded string through JSON
base64 = "77+9UE5HDQ……………oaCgA="
source_contents = urllib.unquote(base64.b64decode(base64)).decode('utf-8')
destination_file = open(destination, 'wb')
destination_file.write(source_contents)
destination_file.close()
$ python test.py
// ...
UnicodeEncodeError: 'ascii' codec can't encode character u'ufffd' in position 0: ordinal not in range(128)
附带说明,这是同一文件的两个文本表示形式的屏幕截图;左侧:原件;右侧:从 base64 解码字符串创建的字符串:http://cl.ly/0U3G34110z3c132O2e2x
As a side note, here's a screenshot of two textual representations of a same file; on left: the original; on right: the one created from the base64-decoded string: http://cl.ly/0U3G34110z3c132O2e2x
在尝试重新创建文件时,是否有已知的技巧来规避这些编码问题?您自己将如何实现这一目标?
Is there a known trick to circumvent these problems with encoding when attempting to recreating the file? How would you achieve this yourself?
非常感谢任何帮助或提示:)
Any help or hint much appreciated :)
所以我在回答自己——对此感到抱歉——但我认为这对像我这样迷失的人可能有用;)
So I'm answering to myself — and sorry for that — but I think it might be useful for someone as lost as I was ;)
所以你必须使用 ArrayBuffer 并设置 responseType XMLHttpRequest 对象实例的属性到 arraybuffer 用于检索原生字节数组,可以使用以下便捷函数将其转换为 base64(找到 那里,作者在这里可能有福了):
So you have to use ArrayBuffer and set the responseType property of your XMLHttpRequest object instance to arraybuffer for retrieving a native array of Bytes, which can be converted to base64 using the following convenient function (found there, author may be blessed here):
function base64ArrayBuffer(arrayBuffer) {
var base64 = ''
var encodings = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
var bytes = new Uint8Array(arrayBuffer)
var byteLength = bytes.byteLength
var byteRemainder = byteLength % 3
var mainLength = byteLength - byteRemainder
var a, b, c, d
var chunk
// Main loop deals with bytes in chunks of 3
for (var i = 0; i < mainLength; i = i + 3) {
// Combine the three bytes into a single integer
chunk = (bytes[i] << 16) | (bytes[i + 1] << 8) | bytes[i + 2]
// Use bitmasks to extract 6-bit segments from the triplet
a = (chunk & 16515072) >> 18 // 16515072 = (2^6 - 1) << 18
b = (chunk & 258048) >> 12 // 258048 = (2^6 - 1) << 12
c = (chunk & 4032) >> 6 // 4032 = (2^6 - 1) << 6
d = chunk & 63 // 63 = 2^6 - 1
// Convert the raw binary segments to the appropriate ASCII encoding
base64 += encodings[a] + encodings[b] + encodings[c] + encodings[d]
}
// Deal with the remaining bytes and padding
if (byteRemainder == 1) {
chunk = bytes[mainLength]
a = (chunk & 252) >> 2 // 252 = (2^6 - 1) << 2
// Set the 4 least significant bits to zero
b = (chunk & 3) << 4 // 3 = 2^2 - 1
base64 += encodings[a] + encodings[b] + '=='
} else if (byteRemainder == 2) {
chunk = (bytes[mainLength] << 8) | bytes[mainLength + 1]
a = (chunk & 64512) >> 10 // 64512 = (2^6 - 1) << 10
b = (chunk & 1008) >> 4 // 1008 = (2^6 - 1) << 4
// Set the 2 least significant bits to zero
c = (chunk & 15) << 2 // 15 = 2^4 - 1
base64 += encodings[a] + encodings[b] + encodings[c] + '='
}
return base64
}
所以这是一个有效的代码:
So here's a working code:
var xhr = new XMLHttpRequest();
xhr.open('GET', 'http://some.tld/favicon.png', false);
xhr.responseType = 'arraybuffer';
xhr.onload = function(e) {
console.log(base64ArrayBuffer(e.currentTarget.response));
};
xhr.send();
这将记录一个有效表示二进制文件内容的base64编码字符串.
This will log a valid base64 encoded string representing the binary file contents.
对于无法访问 ArrayBuffer 并且 btoa() 编码字符失败的旧浏览器,这是另一种获取任何二进制文件的 base64 编码版本:
For older browsers not having access to ArrayBuffer and having btoa() failing on encoding characters, here's another way to get a base64 encoded version of any binary:
function getBinary(file){
var xhr = new XMLHttpRequest();
xhr.open("GET", file, false);
xhr.overrideMimeType("text/plain; charset=x-user-defined");
xhr.send(null);
return xhr.responseText;
}
function base64Encode(str) {
var CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
var out = "", i = 0, len = str.length, c1, c2, c3;
while (i < len) {
c1 = str.charCodeAt(i++) & 0xff;
if (i == len) {
out += CHARS.charAt(c1 >> 2);
out += CHARS.charAt((c1 & 0x3) << 4);
out += "==";
break;
}
c2 = str.charCodeAt(i++);
if (i == len) {
out += CHARS.charAt(c1 >> 2);
out += CHARS.charAt(((c1 & 0x3)<< 4) | ((c2 & 0xF0) >> 4));
out += CHARS.charAt((c2 & 0xF) << 2);
out += "=";
break;
}
c3 = str.charCodeAt(i++);
out += CHARS.charAt(c1 >> 2);
out += CHARS.charAt(((c1 & 0x3) << 4) | ((c2 & 0xF0) >> 4));
out += CHARS.charAt(((c2 & 0xF) << 2) | ((c3 & 0xC0) >> 6));
out += CHARS.charAt(c3 & 0x3F);
}
return out;
}
console.log(base64Encode(getBinary('http://www.google.fr/images/srpr/logo3w.png')));
希望这能像对我一样帮助其他人.
Hope this helps others as it did for me.
这篇关于使用 Javascript 检索二进制文件内容,base64 对其进行编码并使用 Python 对其进行反向解码的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持html5模板网!
即使在调用 abort (jQuery) 之后,浏览器也会等待Browser waits for ajax call to complete even after abort has been called (jQuery)(即使在调用 abort (jQuery) 之后,浏览器也会等待 ajax 调用
JavaScript innerHTML 不适用于 IE?JavaScript innerHTML is not working for IE?(JavaScript innerHTML 不适用于 IE?)
XMLHttpRequest 无法加载,请求的资源上不存在“AXMLHttpRequest cannot load, No #39;Access-Control-Allow-Origin#39; header is present on the requested resource(XMLHttpRequest 无法加载,请求的资
XHR HEAD 请求是否有可能不遵循重定向 (301 302)Is it possible for XHR HEAD requests to not follow redirects (301 302)(XHR HEAD 请求是否有可能不遵循重定向 (301 302))
XMLHttpRequest 206 部分内容XMLHttpRequest 206 Partial Content(XMLHttpRequest 206 部分内容)
XMLHttpRequest 的 getResponseHeader() 的限制?Restrictions of XMLHttpRequest#39;s getResponseHeader()?(XMLHttpRequest 的 getResponseHeader() 的限制?)