#!/bin/bash
# python package batch deployment
# Create Cache Directory
cd ~
mkdir -p ~/.pip/cache
# Create Config Files
cat > ~/.pip/requirement.txt <<_EOF_
pymongo
pymysql
redis
_EOF_
cat > ~/.pip/pip.conf <<_EOF_
[global]
no-index = true
find-links = http://server/.pip/cache
_EOF_
# Download Packages
pip install -d ~/.pip/cache -r ~/requirement.txt
# Start HTTP Server
sudo python -m SimpleHTTPServer 80 &
# Deploy to Clients
for ip in 192.168.3.{150..200}
do
echo "======$ip======"
ssh client@$ip 'curl -O http://server/.pip/pip.conf && sudo PIP_CONFIG_FILE=pip.conf pip install -U -r http://server/.pip/requirement.txt'
done
Note: You should config clients:
- Use the same username and password
- Run sudo without password
- Login via ssh private key (otherwise, use sshpass util)
2013-07-23
2013-05-03
How to scrape an obfuscated site? (二)
## Problem
How to scrape an obfuscated site (such as `spys.ru`).
This site try to use javascript variables(which are generated randomly) to stop the scraper.
I don't care about what math it use. I just translate the javascript to python code word by word.
What I learned is that, I should refactor the obfuscated javascript code at the first place.
## Solution
### javascript code
eval(
function(p, x){
var r = o = 60;
var s = {};
x = x.split('^');
function y(c){
return (c35 ? String.fromCharCode(c+29) : c.toString(36));
};
while(o--){
s[y(o)] = x[o] || y(o);
}
return p.replace(new RegExp('\\b\\w+\\b','g'), function(y){return s[y]});
}(
'i=D^C;s=B^E;d=F^A;b=3;m=H^G;r=7;n=8;p=9;c=4;e=5;q=J^y;o=u^x;h=1;l=z^w;a=v^I;j=0;k=2;f=S^V;t=6;g=R^Q;K=j^l;M=h^i;U=k^g;P=b^a;O=c^d;N=e^m;L=t^s;X=r^q;W=n^o;T=p^f;',
'^^^^^^^^^^TwoTwoSeven^Six^Two^Seven2One^Eight^ThreeSixZero^SevenEightSix^Seven^Eight9Five^Five^One^NineSevenNine^Four3Three^Three^Nine5Four^Four^EightOneTwo^Nine^SevenNineEight^Zero^6940^2561^80^8085^88^5703^8000^9276^1337^10079^9090^3313^8909^4852^808^2581^SixFourOneNine^Four2FourFive^TwoEightSixSix^ThreeSevenFiveThree^NineOneNineEight^Zero0ThreeFour^1080^9943^5391^OneTwoTwoOne^FiveOneZeroTwo^8118^Nine1EightSeven^One3SevenZero'
)
)
### python code
#!/usr/bin/env python
# crack
# by Kev++@2013-05-03T18:35:06
import string, re
from pprint import pprint
def num_to_str(x, r, tbl=string.digits+string.lowercase):
return ((x==0) and tbl[0]) or (num_to_str(x//r, r, tbl).lstrip(tbl[0])+tbl[x%r])
def build_lookup_table(p, x):
r = o = 60
s = {}
x = x.split('^')
def y(c):
return ('' if c35 else num_to_str(c%r, 36))
for i in range(o):
s[y(i)] = x[i] or y(i)
p = re.sub(r'\b\w+\b', lambda m: s[m.group(0)], p)
tbl = dict()
for i in p.strip(';').split(';'):
k, v = i.split('=')
tbl[k] = reduce(lambda x,y: x^y, [int(tbl.get(j, j)) for j in v.split('^')])
return tbl
pprint(build_lookup_table(
'i=D^C;s=B^E;d=F^A;b=3;m=H^G;r=7;n=8;p=9;c=4;e=5;q=J^y;o=u^x;h=1;l=z^w;a=v^I;j=0;k=2;f=S^V;t=6;g=R^Q;K=j^l;M=h^i;U=k^g;P=b^a;O=c^d;N=e^m;L=t^s;X=r^q;W=n^o;T=p^f;',
'^^^^^^^^^^TwoTwoSeven^Six^Two^Seven2One^Eight^ThreeSixZero^SevenEightSix^Seven^Eight9Five^Five^One^NineSevenNine^Four3Three^Three^Nine5Four^Four^EightOneTwo^Nine^SevenNineEight^Zero^6940^2561^80^8085^88^5703^8000^9276^1337^10079^9090^3313^8909^4852^808^2581^SixFourOneNine^Four2FourFive^TwoEightSixSix^ThreeSevenFiveThree^NineOneNineEight^Zero0ThreeFour^1080^9943^5391^OneTwoTwoOne^FiveOneZeroTwo^8118^Nine1EightSeven^One3SevenZero'
))
## Result
{'Eight': 5,
'Eight9Five': 8806,
'EightOneTwo': 2637,
'Five': 0,
'FiveOneZeroTwo': 8941,
'Four': 9,
'Four2FourFive': 1976,
'Four3Three': 12345,
'Nine': 7,
'Nine1EightSeven': 1153,
'Nine5Four': 1161,
'NineOneNineEight': 5045,
'NineSevenNine': 5655,
'One': 2,
'One3SevenZero': 2634,
'OneTwoTwoOne': 2736,
'Seven': 1,
'Seven2One': 5041,
'SevenEightSix': 8943,
'SevenNineEight': 1982,
'Six': 3,
'SixFourOneNine': 5655,
'Three': 8,
'ThreeSevenFiveThree': 12348,
'ThreeSixZero': 2745,
'Two': 4,
'TwoEightSixSix': 8807,
'TwoTwoSeven': 2345,
'Zero': 6,
'Zero0ThreeFour': 2346}
## Links
- http://spys.ru/free-proxy-list/CN/
2013-04-28
How to post spam 500 times?
## Problem
You want to post spam 500 times with a single click.
## Solution
' open "http://weibo.com/kevpp"
' play the macros for 500 times
EVENTS TYPE=KEYPRESS SELECTOR=".input_detail" CHARS="Kev++到此一游({{!NOW:yyyy-mm-ddThh:nn:ss}})\n"
TAG POS=1 TYPE=A ATTR=TXT:发布
WAIT SECONDS=5
## Result
abc-spider
Kev++到此一游(2013-04-28T16:42:11)
| 转发| 收藏| 评论
10分钟前 来自新浪微博
abc-spider
Kev++到此一游(2013-04-28T16:42:05)
| 转发| 收藏| 评论
10分钟前 来自新浪微博
abc-spider
Kev++到此一游(2013-04-28T16:41:58)
删除| | 转发| 收藏| 评论
10分钟前 来自新浪微博
abc-spider
Kev++到此一游(2013-04-28T16:41:47)
| 转发| 收藏| 评论
10分钟前 来自新浪微博
abc-spider
Kev++到此一游(2013-04-28T16:41:41)
## Links
- http://weibo.com/kevpp
- https://addons.mozilla.org/en-US/firefox/addon/imacros-for-firefox/?src=search
2013-04-24
How to scrape an obfuscated site? (一)
## Problem
How to scrape an obfuscated site (such as `www.hidemyass.com`)
As you can see, random html tags are injected. I break it up into multiple lines with indentation.
You need to clean them up to see the real ip-address (displayed as `88.200.222.238`).
> Some people (including me), when confronted with a problem, think
> “I know, I'll use regular expressions.” Now they have two problems.
s = '''<span>
<style>
.n8jQ{display:none}
.p1Qr{display:inline}
.E3lv{display:none}
.I0ja{display:inline}
.oRy_{display:none}
.FYOA{display:inline}
.oldO{display:none}
.NQ2o{display:inline}
</style>
<span class="n8jQ">54</span>
<span></span>
<div style="display:none">60</div>
<span class="p1Qr">88</span>
<span style="display:none">143</span>
<span class="oRy_">143</span>
<span></span>
<span class="n8jQ">160</span>
<div style="display:none">160</div>
.
<span style="display:none">41</span>
<span class="oldO">41</span>
<div style="display:none">41</div>
<span class="NQ2o">200</span>
<span class="I0ja">.</span>
<span style="display:none">27</span>
<span style="display:none">63</span>
<div style="display:none">63</div>
<span style="display:none">178</span>
<span style="display:none">191</span>
<span class="47">222</span>
.
<div style="display:none">34</div>
<span style="display:none">45</span>
<span class="n8jQ">45</span>
<span class="oldO">229</span>
<span></span>
<span style="display: inline">238</span>
</span>'''
## Solution
def parse_ipaddr(s):
# normalize tags
txt = re.sub(r'\bdiv\b', 'span', s)
txt = re.sub(r'(?<=>)\s*([.0-9]+)\s*((?=<)(?!</)|(?=</span>$))', r'<span style="display:inline">\g<1></span>', txt)
# extract style sheet
css = {}
l, r = s.find('<style>'), s.rfind('</style>')
for i in s[l+7:r].strip().splitlines():
m = re.search(r'\.(?P<key>[^{]+)\{display:(?P<val>none|inline)\}', i)
if m:
d = m.groupdict()
css[d['key']] = d['val'] == 'inline'
# collect ip parts
ip_parts = []
for j in re.findall(r'<span (class|style)="([^"]+)">([^<>]+)</span>', txt):
if j[0]=='class' and css.get(j[1], True):
ip_parts.append(j[2])
elif j[0]=='style' and 'inline' in j[1]:
ip_parts.append(j[2])
else:
pass
return ''.join(ip_parts)
## Result
>>> parse_ipaddr(s)
'88.200.222.238'
## Links
- http://www.hidemyass.com/proxy-list/
- http://regex.info/blog/2006-09-15/247
2013-04-11
计算编程高手的平均年龄
Tags:
json
## Problem
What's the average age of StackOverflow top users?
## Solution 1
URL='https://api.stackexchange.com/2.1/users?order=desc&sort=reputation&site=stackoverflow'
curl -s $URL |
gunzip |
jsawk 'return $$.items' |
jsawk 'return $$.age' -a 'return $$.reduce(function(x,y){return x+y})/$$.length'
## Solution 2
URL='https://api.stackexchange.com/2.1/users?order=desc&sort=reputation&site=stackoverflow'
curl -s $URL |
gunzip |
jq '[.items[].age | select(.)] | add/length'
## Result
36.72
## Links:
- https://github.com/micha/jsawk
- http://stedolan.github.io/jq/manual/
Subscribe to:
Posts (Atom)