前面操作参考上一篇博客
腾讯地图API爬取街景
1用到工具将经纬度转换成可识别的场景id 坐标转成panoid,执行read.html,将执行好的panoid先复制在word,然后转换成txt,最后将这一列添加到excel
转换工具主要代码
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta http-equiv="X-UA-Compatible" content="ie=edge" />
<link rel="stylesheet" href="app.css" />
<title>转换</title>
</head>
<body>
<div class="container">
<h2>读取excel(仅读取第一个sheet)</h2>
<div class="mt-sm">
<input type="file" id="file" style="display:none;"
accept="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" />
<a href="javascript:selectFile()">加载本地excel文件</a>
</div>
<div id="panel">
<span>pano:</span>
<span id="panoid"></span>
</div>
<p>结果输出:(下面表格可直接编辑导出)</p>
<button type="button" onclick="translateExcel()" style="margin-bottom:10px">
转化
</button>
<div id="result" contenteditable="">
<table>
<tbody>
<tr>
<th></th>
<th>A</th>
<th>B</th>
<th>C</th>
<th>D</th>
</tr>
<tr>
<td>1</td>
<td>name</td>
<td>经度</td>
<td>纬度</td>
<td>值</td>
</tr>
<tr>
<td>2</td>
<td>人民中路一段</td>
<td>104.0672425</td>
<td>30.65643045</td>
<td></td>
</tr>
</tbody>
</table>
</div>
<h2>导出excel</h2>
<div class="mt-sm" style="padding-bottom:40px;">
<input type="button" onclick="exportExcel()" value="保存" />
上面读取的表格您可以直接编辑,编辑后点击保存即可导出excel文件。
</div>
<script type="text/javascript" src="js/jquery.min.js"></script>
<script type="text/javascript" src="js/xlsx.core.min.js"></script>
<script charset="utf-8" src="https://map.qq.com/api/js?v=2.exp&key=OB4BZ-D4W3U-B7VVO-4PJWW-6TKDJ-WPB77"></script>
<script type="text/javascript">
var book;
var workbook1;
var Bvalue = [];
var Cvalue = [];
var Dvalue = [];
function selectFile() {
document.getElementById("file").click();
}
// 读取本地excel文件
function readWorkbookFromLocalFile(file, callback) {
var reader = new FileReader();
reader.onload = function (e) {
var data = e.target.result;
var workbook = XLSX.read(data, {
type: "binary"
});
if (callback) callback(workbook);
};
reader.readAsBinaryString(file);
}
// 读取 excel文件
function outputWorkbook(workbook) {
var sheetNames = workbook.SheetNames; // 工作表名称集合
sheetNames.forEach(name => {
var worksheet = workbook.Sheets[name]; // 只能通过工作表名称来获取指定工作表
for (var key in worksheet) {
// v是读取单元格的原始值
console.log(
key,
key[0] === "!" ? worksheet[key] : worksheet[key].v
);
}
});
}
function readWorkbook(workbook) {
var sheetNames = workbook.SheetNames; // 工作表名称集合
var worksheet = workbook.Sheets[sheetNames[0]]; // 这里我们只读取第一张sheet
var csv = XLSX.utils.sheet_to_csv(worksheet);
document.getElementById("result").innerHTML = csv2table(csv);
this.book = csv;
}
// 将csv转换成表格
function csv2table(csv) {
var html = "<table>";
var rows = csv.split("\n");
rows.pop(); // 最后一行没用的
rows.forEach(function (row, idx) {
var columns = row.split(",");
columns.unshift(idx + 1); // 添加行索引
if (idx == 0) {
// 添加列索引
html += "<tr>";
for (var i = 0; i < columns.length; i++) {
html +=
"<th>" +
(i == 0 ? "" : String.fromCharCode(65 + i - 1)) +
"</th>";
}
html += "</tr>";
}
html += "<tr>";
columns.forEach(function (column) {
html += "<td>" + column + "</td>";
});
html += "</tr>";
});
html += "</table>";
return html;
}
/**
*输出部分
*/
function table2csv(table) {
var csv = [];
$(table)
.find("tr")
.each(function () {
var temp = [];
$(this)
.find("td")
.each(function () {
temp.push($(this).html());
});
temp.shift(); // 移除第一个
csv.push(temp.join(","));
});
csv.shift();
return csv.join("\n");
}
// csv转sheet对象
function csv2sheet(csv) {
var sheet = {}; // 将要生成的sheet
csv = csv.split("\n");
csv.forEach(function (row, i) {
row = row.split(",");
if (i == 0)
sheet["!ref"] =
"A1:" +
String.fromCharCode(65 + row.length - 1) +
(csv.length - 1);
row.forEach(function (col, j) {
sheet[String.fromCharCode(65 + j) + (i + 1)] = {
v: col
};
});
});
return sheet;
}
// 将一个sheet转成最终的excel文件的blob对象,然后利用URL.createObjectURL下载
function sheet2blob(sheet, sheetName) {
sheetName = sheetName || "sheet1";
var workbook = {
SheetNames: [sheetName],
Sheets: {}
};
workbook.Sheets[sheetName] = sheet;
// 生成excel的配置项
var wopts = {
bookType: "xlsx", // 要生成的文件类型
bookSST: false, // 是否生成Shared String Table,官方解释是,如果开启生成速度会下降,但在低版本IOS设备上有更好的兼容性
type: "binary"
};
var wbout = XLSX.write(workbook, wopts);
var blob = new Blob([s2ab(wbout)], {
type: "application/octet-stream"
});
// 字符串转ArrayBuffer
function s2ab(s) {
var buf = new ArrayBuffer(s.length);
var view = new Uint8Array(buf);
for (var i = 0; i != s.length; ++i)
view[i] = s.charCodeAt(i) & 0xff;
return buf;
}
return blob;
}
/**
* 通用的打开下载对话框方法,没有测试过具体兼容性
* @param url 下载地址,也可以是一个blob对象,必选
* @param saveName 保存文件名,可选
*/
function openDownloadDialog(url, saveName) {
if (typeof url == "object" && url instanceof Blob) {
url = URL.createObjectURL(url); // 创建blob地址
}
var aLink = document.createElement("a");
aLink.href = url;
aLink.download = saveName || ""; // HTML5新增的属性,指定保存文件名,可以不要后缀,注意,file:///模式下不会生效
var event;
if (window.MouseEvent) event = new MouseEvent("click");
else {
event = document.createEvent("MouseEvents");
event.initMouseEvent(
"click",
true,
false,
window,
0,
0,
0,
0,
0,
false,
false,
false,
false,
0,
null
);
}
aLink.dispatchEvent(event);
}
$(function () {
document
.getElementById("file")
.addEventListener("change", function (e) {
var files = e.target.files;
if (files.length == 0) return;
var f = files[0];
if (!/\.xlsx$/g.test(f.name)) {
alert("仅支持读取xlsx格式!");
return;
}
readWorkbookFromLocalFile(f, function (workbook) {
readWorkbook(workbook);
});
});
});
function exportExcel() {
var csv = table2csv($("#result table")[0]);
var sheet = csv2sheet(csv);
var blob = sheet2blob(sheet);
openDownloadDialog(blob, "导出.xlsx");
}
function translateExcel() {
var csv = this.book;
translate(csv);
}
function translate(csv) {
var _this = this;
var rowss = csv.split("\n");
var tb = _this.document.getElementById('result'); //获取表格的dom节点
var array = tb.getElementsByTagName("tr"); //所有tr
var panoService = new qq.maps.PanoramaService();
rowss.pop(); // 最后一行没用的
rowss.forEach(function (rowss, idx) {
var columns = rowss.split(",");
columns.unshift(idx + 1); // 添加行索引
// Bvalue[idx]['lat'] = columns[2];
// Bvalue[idx]['lng'] = columns[3];
var lnglat = {
lat: columns[3],
lng: columns[2]
};
_this.Bvalue[idx] = lnglat;
});
for (var i = 1; i < Bvalue.length; i++) {
var tr = array[i];
var td = tr.getElementsByTagName("td");
//var aim = td[4];
//console.log(aim);
panoService.getPano(Bvalue[i], 1000, function (result) {
var avaul = result ? result.svid : "此处无街景";
// $(tr).find("td").eq(4).text(avaul);
//aim.text(avaul);
//console.log(result);
console.log(avaul);
//console.log(isDOM(tb));
// var td = tb.rows[0].cells[0]; //获取0行0列的td单元格
// td.innerHTML = avual;
// document.getElementById("panoid").innerHTML = result ? result.svid : "此处无街景";
// $(tb).find("td").eq(4).text(avual);
});
sleep(10);
// var avaul = document.getElementById("panoid").innerHTML;
// $(tr).find("td").eq(4).text(avaul);
}
}
function sleep(delay) {
var start = (new Date()).getTime();
while ((new Date()).getTime() - start < delay) {
continue;
}
}
var isDOM = (typeof HTMLElement === 'object') ?
function (obj) {
return obj instanceof HTMLElement;
} :
function (obj) {
return obj && typeof obj === 'object' && obj.nodeType === 1 && typeof obj.nodeName === 'string';
}
</script>
</div>
</body>
</html>
2 然后在streetview下新建2个文件夹,raw_img 和meg_img 两个文件夹,并将要读取的excel转换成csv格式,放在/streetview目录下
3 执行s_2_get_and_merge_img.py文件就可以 不受key限制爬取街景
# -*- coding: utf-8 -*-
# Spider Lv4
# Author: Yue H.W. Luo
# Mail: yue.rimoe@gmail.com
# License : http://www.apache.org/licenses/LICENSE-2.0
# More detial: https://blog.rimoe.xyz/2019/03/14/post01/
"""
## NOTE
Created on Wed Mar 13 21:24:23 2019
This programme is used to get data from Tencent street view.
Get the image of a street view point.
"""
import os
import cv2
import json
import requests
import numpy as np
os.chdir(r'street')
rpath = 'raw_img'
mpath = 'meg_img'
def download(path, svid, x, y):
# down street view image
url = 'https://sv6.map.qq.com/tile?svid={}&x={}&y={}&level=0' +\
'&mtype=mobile-cube&from=web'
path = '{}/id{}_x{}_y{}.jpg'.format(path, svid, x, y)
r = requests.get(url.format(svid, x, y))
with open(path, 'wb') as fd:
fd.write(r.content)
def merge_y(path, svid, x):
# merge y axis
img1 = cv2.imread('%s/id%s_x%s_y0.jpg' % (path, svid, x), 1)
img2 = cv2.imread('%s/id%s_x%s_y1.jpg' % (path, svid, x), 1)
if img1 is None:
download(path, svid, x, 0)
if img2 is None:
download(path, svid, x, 1)
try:
return np.concatenate([img1, img2], axis=0)
except ValueError:
return merge_y(path, svid, x)
def merge_x(path, svid, n):
# merge x axis
x = 2 * n
img1 = merge_y(path, svid, x)
img2 = merge_y(path, svid, x + 1)
return np.concatenate([img1, img2], axis=1)
# read todo list
with open('./TwoRingRoad_火星坐标_9058panoid.csv', 'r') as rf:
rf.readline()
clean = rf.readlines()
done = []
# with open('done_meg.json', 'r') as wf:
# done = json.load(wf)
for svid in clean:
info = svid.split(',')
svid = svid.split(',')[0]
if svid in done:
continue
for n in range(4):
_img = merge_x(rpath, svid, n)
# cv2.imwrite('{}/{}_{}_{}_0_{}.jpg'.format(mpath, info[1], info[2], info[3].replace('\n', ''), n*90), _img)
cv2.imencode('.jpg', _img)[1].tofile('{}/{}_{}_{}_0_{}.jpg'.format(mpath, info[1], info[2], info[3].replace('\n', ''), n*90))
done.append(svid)
# save for interup
print ("done:%s_%s" % (svid, len(done)))
with open('done_meg.json', 'w') as wf:
json.dump(done, wf)
本文介绍了如何利用腾讯地图API进行街景数据的爬取,包括坐标转换为panoid,创建文件夹存储原始和合并的图像,以及执行Python脚本不受key限制地爬取街景图像。

5857

被折叠的 条评论
为什么被折叠?



