Участник:LankLinkBot/zlv.py: различия между версиями

Содержимое удалено Содержимое добавлено

Линейный

Версия от 18:47, 20 июня 2011

Бот для добавления шаблона {{Сообщение ЗЛВ}} на страницу обсуждения статей.

Использование

Запуск без параметров - обработать архив текущего месяца. Запуск с параметрами - можно указать два параметра: месяц и год с которых надо начать обработку архивов. Пример:

$ python zlv.py 2 2005

обработать архивы начиная с февраля 2005 года (с самого начала).

Особенности работы бота

Для правильной вставки иллюстраций они должны располагаться перед сообщением к которому относятся на отдельной строке (как правило так и делается).

Сообщения с датами на границе года (XX декабря 20YY года - XX января 20YY+1 года) должны попадать в архив следующего года (сейчас - по-разному).

Исходный код бота

#!/usr/bin/env python
# -*- mode: python; coding: utf-8; -*-
# (c) Lankier mailto:lankier@gmail.com
#
# $Id$
#
import sys
import re
import time
import wikipedia

site = wikipedia.getSite()
wikipedia.setAction(u'Бот: добавление шаблона [[Шаблон:Сообщение ЗЛВ|Сообщение ЗЛВ]]')

def replace_quote(text):
    # safe replace quote (do not replace inside [[]])
    i = 0
    ret = ''
    while True:
        j = text.find('[[', i)
        if j < 0:
            break
        ret += text[i:j].replace(u'«', u'„').replace(u'»', u'“')
        k = text.find(']]', j)
        if k < 0:
            wikipedia.output('replace_quote error: '+text)
            i = j
            break
        ret += '[['
        wlink = text[j+2:k]
        c = wlink.count('|')
        if c == 1:
            s1, s2 = wlink.split('|')
            wlink = s1+'|'+s2.replace(u'«', u'„').replace(u'»', u'“')
        elif c == 0 and (u'«' in wlink or u'»' in wlink):
            wlink = wlink+'|'+wlink.replace(u'«', u'„').replace(u'»', u'“')
        ret += wlink
        i = k
    ret += text[i:].replace(u'«', u'„').replace(u'»', u'“')
    return ret

def in_brackets(s, brackets='{}'):
    '''search for text inside brackets'''
    # from retempl.py
    first, last = brackets
    i = 0
    count = 0
    start = None
    for c in s:
        if c == first:
            if start is None:
                start = i
            count += 1
        if c == last:
            if count == 1:
                return s[start:i+1]
            count -= 1
        i += 1
        if i > len(s):
            break
    return ''

start_month = time.localtime().tm_mon
start_year = time.localtime().tm_year
end_month = start_month
end_year = start_year
def page_gen():
    for y in range(start_year, end_year+1):
        if y < start_year:
            continue
        for m in range(1, 13):
            if y == start_year and m < start_month:
                continue
            if y == end_year and m > end_month:
                break
            pn = u'Википедия:Проект:Знаете_ли_вы/Архив_рубрики/%d-%02d' % (y, m)
            page = wikipedia.Page(site, pn)
            if not page.exists():
                print 'skip:', page
                continue
            yield page, y, m

wpat = re.compile(r"(?:'''[^\]]*?\[\[([^\]]+?)\]\][^\]]*?'''"
                  r"|\[\[([^\|\]]+?)\|'''[^\]]+?'''\]\])", re.U)
def parse_page(page, year, month):
    text = page.get()
    date = None
    image = None
    for line in text.splitlines():
        line = line.strip()
        if line.startswith('=='):
            # date
            d = line.strip('=').strip()
            if re.match(r'[0-9]', d):
                date = d #line.strip('=').strip()
        elif line.startswith('[['): # and line.strip().endswith(']]'):
            # image
            line = line.strip()[2:-2]
            im = line.split('|')[0]
            impage = wikipedia.Page(site, im)
            if impage.namespace() == 6:
                image = impage
        elif date and line.startswith('*'):
            # text
            arch = u' [[Википедия:Проект:Знаете_ли_вы/Архив_рубрики/%d-%02d#%s]]' % (year, month, date)
            line = line.lstrip('*').strip()
            for match in wpat.finditer(line):
                s = match.group(1) or match.group(2)
                assert s
                title = s.split('|')[0]
                link = wikipedia.Page(site, title)
                if link.section():
                    link = wikipedia.Page(site, link.sectionFreeTitle())
                if link.namespace() != 0:
                    continue
                if not link.exists():
                    wikipedia.output('** not exists: '+link.aslink()+arch)
                    continue
                if link.isRedirectPage():
                    print 'redir:', link
                    link = link.getRedirectTarget()
                if link.isDisambig():
                    wikipedia.output('** disambig: '+link.aslink()+arch)
                    continue
                t = line.rstrip(u'.,;…');
                yield link.toggleTalkPage(), t, date, image
            image = None


def format_date(date, year):
    if u'—' in date:                    # mdash
        f, t = date.split(u'—')
    elif '-' in date:                   # minus
        f, t = date.split('-')
    else:
        return u'%s %s года' % (date, year)
    d2, m2 = t.split()
    dm1 = f.split()
    if len(dm1) == 1:
        d1 = dm1[0]
        m1 = m2
    else:
        d1, m1 = dm1
    # assert
    int(d1)
    int(d2)
    #
    if m1 == u'декабря' and m2 == u'января':
        return u'%s %s %d года — %s %s %d года' % (d1, m1, year-1, d2, m2, year)
    if m1 == m2:
        return u'%s-%s %s %d года' % (d1, d2, m1, year)
    return u'%s %s — %s %s %s года' % (d1, m1, d2, m2, year)

# template without image
templ = u'{{Сообщение ЗЛВ|даты=%s|текст=%s|архив=%s}}\n'
# template within image
templ_im = u'{{Сообщение ЗЛВ|даты=%s|текст=%s|иллюстрация=%s|архив=%s}}\n'
def insert_templ(talk, text, date, year, month, image):
    print 'insert_templ:', talk
    if talk.exists():
        ptext = talk.get()
        if u'{{Сообщение ЗЛВ}}' in ptext:
            wikipedia.output('*** need expand: '+talk.aslink())
            #return False
        else:
            for t in (u'Сообщение ЗЛВ',
                     u'Знаете ли вы-статья',
                     u'ЗЛВ'):
                t = t.replace(' ', '[ _]')
                pat = re.compile(r'{{\s*'+t+r'\s*[\|}<]', re.I|re.U|re.S)
                match = pat.search(ptext)
                if match:
                    # template exists
                    wikipedia.output('*** template exists: '+talk.aslink())
                    return False
    text = replace_quote(text)
    d = format_date(date, year)
    arch = '%d-%02d#%s' % (year, month, date)
    if image:
        ptempl = templ_im % (d, text, image.titleWithoutNamespace(), arch)
    else:
        ptempl = templ % (d, text, arch)
    #print ptempl
    if not talk.exists():
        wikipedia.output('new page: '+talk.aslink())
        talk.put(ptempl)
        return True
    ptext = talk.get()
    if u'{{Сообщение ЗЛВ}}' in ptext:
        # replace
        ptext = ptext.replace(u'{{Сообщение ЗЛВ}}', ptempl, 1)
    else:
        # add
        # skip the header templates
        i = 0
        while True:
            match = re.match((r'\s*({{)'), ptext[i:])
            if not match:
                break
            j = match.start(1)
            tt = in_brackets(ptext[i+j:])
            if not tt:
                break
            i = i+j+len(tt)
        if i == 0:
            ptext = ptempl + ptext
        elif len(ptext) > i and ptext[i] == '\n':
            ptext = ptext[:i+1] + ptempl + ptext[i+1:]
        else:
            ptext = ptext[:i]+'\n' + ptempl + ptext[i:]
    # put
    wikipedia.output('== '+talk.aslink()+' ==')
    wikipedia.showDiff(talk.get(), ptext)
    talk.put(ptext)
    return True


if len(sys.argv) > 1:
    start_month = int(sys.argv[1])
    start_year = int(sys.argv[2])
for page, year, month in page_gen():
    print '>>', page, year, month
    for talk, text, date, image in parse_page(page, year, month):
        insert_templ(talk, text, date, year, month, image)

Участник:LankLinkBot/zlv.py: различия между версиями

Версия от 18:47, 20 июня 2011

Использование

Особенности работы бота

Исходный код бота

Навигация

Поиск